diff options
author | crupest <crupest@outlook.com> | 2021-10-30 21:08:43 +0800 |
---|---|---|
committer | crupest <crupest@outlook.com> | 2021-10-30 21:08:43 +0800 |
commit | e68e0d9a5130e8bc0b634572b7fd44b9bfc0f8ef (patch) | |
tree | 92c7efcef195da667e54566fb41ed16bf1975649 /include/cru/common | |
parent | 562092ce48d586c786a0649a80ae0b42a301c0b5 (diff) | |
download | cru-e68e0d9a5130e8bc0b634572b7fd44b9bfc0f8ef.tar.gz cru-e68e0d9a5130e8bc0b634572b7fd44b9bfc0f8ef.tar.bz2 cru-e68e0d9a5130e8bc0b634572b7fd44b9bfc0f8ef.zip |
...
Diffstat (limited to 'include/cru/common')
-rw-r--r-- | include/cru/common/String.hpp | 2 | ||||
-rw-r--r-- | include/cru/common/StringUtil.hpp | 78 |
2 files changed, 78 insertions, 2 deletions
diff --git a/include/cru/common/String.hpp b/include/cru/common/String.hpp index c05ab6e0..544d24a4 100644 --- a/include/cru/common/String.hpp +++ b/include/cru/common/String.hpp @@ -171,6 +171,8 @@ class CRU_BASE_API String { } public: + void AppendCodePoint(CodePoint code_point); + Utf16CodePointIterator CodePointIterator() const { return Utf16CodePointIterator( std::u16string_view(reinterpret_cast<char16_t*>(buffer_), size_)); diff --git a/include/cru/common/StringUtil.hpp b/include/cru/common/StringUtil.hpp index 4291a0da..cd2f4e16 100644 --- a/include/cru/common/StringUtil.hpp +++ b/include/cru/common/StringUtil.hpp @@ -2,6 +2,7 @@ #include "Base.hpp" #include <functional> +#include <stdexcept> #include <string> #include <string_view> @@ -121,8 +122,81 @@ using Utf16CodePointIterator = void CRU_BASE_API Utf8EncodeCodePointAppend(CodePoint code_point, std::string& str); -void CRU_BASE_API Utf16EncodeCodePointAppend(CodePoint code_point, - std::u16string& str); + +namespace details { +template <typename UInt, int number_of_bit, typename ReturnType> +inline std::enable_if_t<std::is_unsigned_v<UInt>, ReturnType> ExtractBits( + UInt n) { + return static_cast<ReturnType>(n & ((1u << number_of_bit) - 1)); +} +} // namespace details + +template <typename TAppend> +bool Utf8EncodeCodePointAppendWithFunc(CodePoint code_point, TAppend&& append) { + auto write_continue_byte = [&append](std::uint8_t byte6) { + append((1u << 7) + (((1u << 6) - 1) & byte6)); + }; + + if (code_point >= 0 && code_point <= 0x007F) { + append(static_cast<char>(code_point)); + return true; + } else if (code_point >= 0x0080 && code_point <= 0x07FF) { + std::uint32_t unsigned_code_point = code_point; + append( + static_cast<char>(details::ExtractBits<std::uint32_t, 5, std::uint8_t>( + (unsigned_code_point >> 6)) + + 0b11000000)); + write_continue_byte(details::ExtractBits<std::uint32_t, 6, std::uint8_t>( + unsigned_code_point)); + return true; + } else if (code_point >= 0x0800 && code_point <= 0xFFFF) { + std::uint32_t unsigned_code_point = code_point; + append( + static_cast<char>(details::ExtractBits<std::uint32_t, 4, std::uint8_t>( + (unsigned_code_point >> (6 * 2))) + + 0b11100000)); + write_continue_byte(details::ExtractBits<std::uint32_t, 6, std::uint8_t>( + unsigned_code_point >> 6)); + write_continue_byte(details::ExtractBits<std::uint32_t, 6, std::uint8_t>( + unsigned_code_point)); + return true; + } else if (code_point >= 0x10000 && code_point <= 0x10FFFF) { + std::uint32_t unsigned_code_point = code_point; + append( + static_cast<char>(details::ExtractBits<std::uint32_t, 3, std::uint8_t>( + (unsigned_code_point >> (6 * 3))) + + 0b11110000)); + write_continue_byte(details::ExtractBits<std::uint32_t, 6, std::uint8_t>( + unsigned_code_point >> (6 * 2))); + write_continue_byte(details::ExtractBits<std::uint32_t, 6, std::uint8_t>( + unsigned_code_point >> 6)); + write_continue_byte(details::ExtractBits<std::uint32_t, 6, std::uint8_t>( + unsigned_code_point)); + return true; + } else { + return false; + } +} + +template <typename TAppend> +bool Utf16EncodeCodePointAppendWithFunc(CodePoint code_point, + TAppend&& append) { + if ((code_point >= 0 && code_point <= 0xD7FF) || + (code_point >= 0xE000 && code_point <= 0xFFFF)) { + append(static_cast<char16_t>(code_point)); + return true; + } else if (code_point >= 0x10000 && code_point <= 0x10FFFF) { + std::uint32_t u = code_point - 0x10000; + append(static_cast<char16_t>( + details::ExtractBits<std::uint32_t, 10, std::uint32_t>(u >> 10) + + 0xD800u)); + append(static_cast<char16_t>( + details::ExtractBits<std::uint32_t, 10, std::uint32_t>(u) + 0xDC00u)); + return true; + } else { + return false; + } +} std::string CRU_BASE_API ToUtf8(std::u16string_view s); std::u16string CRU_BASE_API ToUtf16(std::string_view s); |