From e68e0d9a5130e8bc0b634572b7fd44b9bfc0f8ef Mon Sep 17 00:00:00 2001 From: crupest Date: Sat, 30 Oct 2021 21:08:43 +0800 Subject: ... --- include/cru/common/String.hpp | 2 + include/cru/common/StringUtil.hpp | 78 ++++++++++++++++++++++++++++++++++++++- src/common/String.cpp | 8 ++++ src/common/StringUtil.cpp | 59 +++-------------------------- src/osx/Convert.cpp | 4 +- 5 files changed, 93 insertions(+), 58 deletions(-) diff --git a/include/cru/common/String.hpp b/include/cru/common/String.hpp index c05ab6e0..544d24a4 100644 --- a/include/cru/common/String.hpp +++ b/include/cru/common/String.hpp @@ -171,6 +171,8 @@ class CRU_BASE_API String { } public: + void AppendCodePoint(CodePoint code_point); + Utf16CodePointIterator CodePointIterator() const { return Utf16CodePointIterator( std::u16string_view(reinterpret_cast(buffer_), size_)); diff --git a/include/cru/common/StringUtil.hpp b/include/cru/common/StringUtil.hpp index 4291a0da..cd2f4e16 100644 --- a/include/cru/common/StringUtil.hpp +++ b/include/cru/common/StringUtil.hpp @@ -2,6 +2,7 @@ #include "Base.hpp" #include +#include #include #include @@ -121,8 +122,81 @@ using Utf16CodePointIterator = void CRU_BASE_API Utf8EncodeCodePointAppend(CodePoint code_point, std::string& str); -void CRU_BASE_API Utf16EncodeCodePointAppend(CodePoint code_point, - std::u16string& str); + +namespace details { +template +inline std::enable_if_t, ReturnType> ExtractBits( + UInt n) { + return static_cast(n & ((1u << number_of_bit) - 1)); +} +} // namespace details + +template +bool Utf8EncodeCodePointAppendWithFunc(CodePoint code_point, TAppend&& append) { + auto write_continue_byte = [&append](std::uint8_t byte6) { + append((1u << 7) + (((1u << 6) - 1) & byte6)); + }; + + if (code_point >= 0 && code_point <= 0x007F) { + append(static_cast(code_point)); + return true; + } else if (code_point >= 0x0080 && code_point <= 0x07FF) { + std::uint32_t unsigned_code_point = code_point; + append( + static_cast(details::ExtractBits( + (unsigned_code_point >> 6)) + + 0b11000000)); + write_continue_byte(details::ExtractBits( + unsigned_code_point)); + return true; + } else if (code_point >= 0x0800 && code_point <= 0xFFFF) { + std::uint32_t unsigned_code_point = code_point; + append( + static_cast(details::ExtractBits( + (unsigned_code_point >> (6 * 2))) + + 0b11100000)); + write_continue_byte(details::ExtractBits( + unsigned_code_point >> 6)); + write_continue_byte(details::ExtractBits( + unsigned_code_point)); + return true; + } else if (code_point >= 0x10000 && code_point <= 0x10FFFF) { + std::uint32_t unsigned_code_point = code_point; + append( + static_cast(details::ExtractBits( + (unsigned_code_point >> (6 * 3))) + + 0b11110000)); + write_continue_byte(details::ExtractBits( + unsigned_code_point >> (6 * 2))); + write_continue_byte(details::ExtractBits( + unsigned_code_point >> 6)); + write_continue_byte(details::ExtractBits( + unsigned_code_point)); + return true; + } else { + return false; + } +} + +template +bool Utf16EncodeCodePointAppendWithFunc(CodePoint code_point, + TAppend&& append) { + if ((code_point >= 0 && code_point <= 0xD7FF) || + (code_point >= 0xE000 && code_point <= 0xFFFF)) { + append(static_cast(code_point)); + return true; + } else if (code_point >= 0x10000 && code_point <= 0x10FFFF) { + std::uint32_t u = code_point - 0x10000; + append(static_cast( + details::ExtractBits(u >> 10) + + 0xD800u)); + append(static_cast( + details::ExtractBits(u) + 0xDC00u)); + return true; + } else { + return false; + } +} std::string CRU_BASE_API ToUtf8(std::u16string_view s); std::u16string CRU_BASE_API ToUtf16(std::string_view s); diff --git a/src/common/String.cpp b/src/common/String.cpp index 699d807f..ba31e6f6 100644 --- a/src/common/String.cpp +++ b/src/common/String.cpp @@ -1,4 +1,5 @@ #include "cru/common/String.hpp" +#include "cru/common/Exception.hpp" #include "cru/common/StringUtil.hpp" #include @@ -214,6 +215,13 @@ std::string String::ToUtf8() const { return cru::ToUtf8(std::u16string_view(data(), size())); } +void String::AppendCodePoint(CodePoint code_point) { + if (!Utf16EncodeCodePointAppendWithFunc( + code_point, [this](char16_t c) { this->push_back(c); })) { + throw TextEncodeException(u"Code point out of range."); + } +} + Index String::IndexFromCodeUnitToCodePoint(Index code_unit_index) const { auto iter = CodePointIterator(); Index result = 0; diff --git a/src/common/StringUtil.cpp b/src/common/StringUtil.cpp index b1f1ed4b..7492bdfd 100644 --- a/src/common/StringUtil.cpp +++ b/src/common/StringUtil.cpp @@ -1,15 +1,10 @@ #include "cru/common/StringUtil.hpp" +#include #include "cru/common/Base.hpp" #include "cru/common/Exception.hpp" namespace cru { -namespace { -template -inline std::enable_if_t, ReturnType> ExtractBits( - UInt n) { - return static_cast(n & ((1u << number_of_bit) - 1)); -} -} // namespace +using details::ExtractBits; CodePoint Utf8NextCodePoint(std::string_view str, Index current, Index* next_position) { @@ -154,57 +149,15 @@ CodePoint Utf16PreviousCodePoint(std::u16string_view str, Index current, } void Utf8EncodeCodePointAppend(CodePoint code_point, std::string& str) { - auto write_continue_byte = [&str](std::uint8_t byte6) { - str.push_back((1u << 7) + (((1u << 6) - 1) & byte6)); - }; - - if (code_point >= 0 && code_point <= 0x007F) { - str.push_back(static_cast(code_point)); - } else if (code_point >= 0x0080 && code_point <= 0x07FF) { - std::uint32_t unsigned_code_point = code_point; - str.push_back(static_cast(ExtractBits( - (unsigned_code_point >> 6)) + - 0b11000000)); - write_continue_byte( - ExtractBits(unsigned_code_point)); - } else if (code_point >= 0x0800 && code_point <= 0xFFFF) { - std::uint32_t unsigned_code_point = code_point; - str.push_back(static_cast(ExtractBits( - (unsigned_code_point >> (6 * 2))) + - 0b11100000)); - write_continue_byte( - ExtractBits(unsigned_code_point >> 6)); - write_continue_byte( - ExtractBits(unsigned_code_point)); - } else if (code_point >= 0x10000 && code_point <= 0x10FFFF) { - std::uint32_t unsigned_code_point = code_point; - str.push_back(static_cast(ExtractBits( - (unsigned_code_point >> (6 * 3))) + - 0b11110000)); - write_continue_byte(ExtractBits( - unsigned_code_point >> (6 * 2))); - write_continue_byte( - ExtractBits(unsigned_code_point >> 6)); - write_continue_byte( - ExtractBits(unsigned_code_point)); - } else { + if (!Utf8EncodeCodePointAppendWithFunc(code_point, + [&str](char c) { str.push_back(c); })) throw TextEncodeException(u"Code point out of range."); - } } void Utf16EncodeCodePointAppend(CodePoint code_point, std::u16string& str) { - if ((code_point >= 0 && code_point <= 0xD7FF) || - (code_point >= 0xE000 && code_point <= 0xFFFF)) { - str.push_back(static_cast(code_point)); - } else if (code_point >= 0x10000 && code_point <= 0x10FFFF) { - std::uint32_t u = code_point - 0x10000; - str.push_back(static_cast( - ExtractBits(u >> 10) + 0xD800u)); - str.push_back(static_cast( - ExtractBits(u) + 0xDC00u)); - } else { + if (!Utf16EncodeCodePointAppendWithFunc( + code_point, [&str](char16_t c) { str.push_back(c); })) throw TextEncodeException(u"Code point out of range."); - } } std::string ToUtf8(std::u16string_view s) { diff --git a/src/osx/Convert.cpp b/src/osx/Convert.cpp index 6bec5adc..6e9692f2 100644 --- a/src/osx/Convert.cpp +++ b/src/osx/Convert.cpp @@ -17,9 +17,7 @@ String Convert(CFStringRef string) { String result; for (int i = 0; i < length; i++) { - std::u16string s; - Utf16EncodeCodePointAppend(CFStringGetCharacterAtIndex(string, i), s); - result.append(s.data(), s.size()); + result.AppendCodePoint(CFStringGetCharacterAtIndex(string, i)); } return result; -- cgit v1.2.3