diff options
author | crupest <crupest@outlook.com> | 2021-10-30 21:08:43 +0800 |
---|---|---|
committer | crupest <crupest@outlook.com> | 2021-10-30 21:08:43 +0800 |
commit | e68e0d9a5130e8bc0b634572b7fd44b9bfc0f8ef (patch) | |
tree | 92c7efcef195da667e54566fb41ed16bf1975649 /src/common/StringUtil.cpp | |
parent | 562092ce48d586c786a0649a80ae0b42a301c0b5 (diff) | |
download | cru-e68e0d9a5130e8bc0b634572b7fd44b9bfc0f8ef.tar.gz cru-e68e0d9a5130e8bc0b634572b7fd44b9bfc0f8ef.tar.bz2 cru-e68e0d9a5130e8bc0b634572b7fd44b9bfc0f8ef.zip |
...
Diffstat (limited to 'src/common/StringUtil.cpp')
-rw-r--r-- | src/common/StringUtil.cpp | 59 |
1 files changed, 6 insertions, 53 deletions
diff --git a/src/common/StringUtil.cpp b/src/common/StringUtil.cpp index b1f1ed4b..7492bdfd 100644 --- a/src/common/StringUtil.cpp +++ b/src/common/StringUtil.cpp @@ -1,15 +1,10 @@ #include "cru/common/StringUtil.hpp" +#include <functional> #include "cru/common/Base.hpp" #include "cru/common/Exception.hpp" namespace cru { -namespace { -template <typename UInt, int number_of_bit, typename ReturnType> -inline std::enable_if_t<std::is_unsigned_v<UInt>, ReturnType> ExtractBits( - UInt n) { - return static_cast<ReturnType>(n & ((1u << number_of_bit) - 1)); -} -} // namespace +using details::ExtractBits; CodePoint Utf8NextCodePoint(std::string_view str, Index current, Index* next_position) { @@ -154,57 +149,15 @@ CodePoint Utf16PreviousCodePoint(std::u16string_view str, Index current, } void Utf8EncodeCodePointAppend(CodePoint code_point, std::string& str) { - auto write_continue_byte = [&str](std::uint8_t byte6) { - str.push_back((1u << 7) + (((1u << 6) - 1) & byte6)); - }; - - if (code_point >= 0 && code_point <= 0x007F) { - str.push_back(static_cast<char>(code_point)); - } else if (code_point >= 0x0080 && code_point <= 0x07FF) { - std::uint32_t unsigned_code_point = code_point; - str.push_back(static_cast<char>(ExtractBits<std::uint32_t, 5, std::uint8_t>( - (unsigned_code_point >> 6)) + - 0b11000000)); - write_continue_byte( - ExtractBits<std::uint32_t, 6, std::uint8_t>(unsigned_code_point)); - } else if (code_point >= 0x0800 && code_point <= 0xFFFF) { - std::uint32_t unsigned_code_point = code_point; - str.push_back(static_cast<char>(ExtractBits<std::uint32_t, 4, std::uint8_t>( - (unsigned_code_point >> (6 * 2))) + - 0b11100000)); - write_continue_byte( - ExtractBits<std::uint32_t, 6, std::uint8_t>(unsigned_code_point >> 6)); - write_continue_byte( - ExtractBits<std::uint32_t, 6, std::uint8_t>(unsigned_code_point)); - } else if (code_point >= 0x10000 && code_point <= 0x10FFFF) { - std::uint32_t unsigned_code_point = code_point; - str.push_back(static_cast<char>(ExtractBits<std::uint32_t, 3, std::uint8_t>( - (unsigned_code_point >> (6 * 3))) + - 0b11110000)); - write_continue_byte(ExtractBits<std::uint32_t, 6, std::uint8_t>( - unsigned_code_point >> (6 * 2))); - write_continue_byte( - ExtractBits<std::uint32_t, 6, std::uint8_t>(unsigned_code_point >> 6)); - write_continue_byte( - ExtractBits<std::uint32_t, 6, std::uint8_t>(unsigned_code_point)); - } else { + if (!Utf8EncodeCodePointAppendWithFunc(code_point, + [&str](char c) { str.push_back(c); })) throw TextEncodeException(u"Code point out of range."); - } } void Utf16EncodeCodePointAppend(CodePoint code_point, std::u16string& str) { - if ((code_point >= 0 && code_point <= 0xD7FF) || - (code_point >= 0xE000 && code_point <= 0xFFFF)) { - str.push_back(static_cast<char16_t>(code_point)); - } else if (code_point >= 0x10000 && code_point <= 0x10FFFF) { - std::uint32_t u = code_point - 0x10000; - str.push_back(static_cast<char16_t>( - ExtractBits<std::uint32_t, 10, std::uint32_t>(u >> 10) + 0xD800u)); - str.push_back(static_cast<char16_t>( - ExtractBits<std::uint32_t, 10, std::uint32_t>(u) + 0xDC00u)); - } else { + if (!Utf16EncodeCodePointAppendWithFunc( + code_point, [&str](char16_t c) { str.push_back(c); })) throw TextEncodeException(u"Code point out of range."); - } } std::string ToUtf8(std::u16string_view s) { |