aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/cru/common/String.hpp2
-rw-r--r--include/cru/common/StringUtil.hpp78
-rw-r--r--src/common/String.cpp8
-rw-r--r--src/common/StringUtil.cpp59
-rw-r--r--src/osx/Convert.cpp4
5 files changed, 93 insertions, 58 deletions
diff --git a/include/cru/common/String.hpp b/include/cru/common/String.hpp
index c05ab6e0..544d24a4 100644
--- a/include/cru/common/String.hpp
+++ b/include/cru/common/String.hpp
@@ -171,6 +171,8 @@ class CRU_BASE_API String {
}
public:
+ void AppendCodePoint(CodePoint code_point);
+
Utf16CodePointIterator CodePointIterator() const {
return Utf16CodePointIterator(
std::u16string_view(reinterpret_cast<char16_t*>(buffer_), size_));
diff --git a/include/cru/common/StringUtil.hpp b/include/cru/common/StringUtil.hpp
index 4291a0da..cd2f4e16 100644
--- a/include/cru/common/StringUtil.hpp
+++ b/include/cru/common/StringUtil.hpp
@@ -2,6 +2,7 @@
#include "Base.hpp"
#include <functional>
+#include <stdexcept>
#include <string>
#include <string_view>
@@ -121,8 +122,81 @@ using Utf16CodePointIterator =
void CRU_BASE_API Utf8EncodeCodePointAppend(CodePoint code_point,
std::string& str);
-void CRU_BASE_API Utf16EncodeCodePointAppend(CodePoint code_point,
- std::u16string& str);
+
+namespace details {
+template <typename UInt, int number_of_bit, typename ReturnType>
+inline std::enable_if_t<std::is_unsigned_v<UInt>, ReturnType> ExtractBits(
+ UInt n) {
+ return static_cast<ReturnType>(n & ((1u << number_of_bit) - 1));
+}
+} // namespace details
+
+template <typename TAppend>
+bool Utf8EncodeCodePointAppendWithFunc(CodePoint code_point, TAppend&& append) {
+ auto write_continue_byte = [&append](std::uint8_t byte6) {
+ append((1u << 7) + (((1u << 6) - 1) & byte6));
+ };
+
+ if (code_point >= 0 && code_point <= 0x007F) {
+ append(static_cast<char>(code_point));
+ return true;
+ } else if (code_point >= 0x0080 && code_point <= 0x07FF) {
+ std::uint32_t unsigned_code_point = code_point;
+ append(
+ static_cast<char>(details::ExtractBits<std::uint32_t, 5, std::uint8_t>(
+ (unsigned_code_point >> 6)) +
+ 0b11000000));
+ write_continue_byte(details::ExtractBits<std::uint32_t, 6, std::uint8_t>(
+ unsigned_code_point));
+ return true;
+ } else if (code_point >= 0x0800 && code_point <= 0xFFFF) {
+ std::uint32_t unsigned_code_point = code_point;
+ append(
+ static_cast<char>(details::ExtractBits<std::uint32_t, 4, std::uint8_t>(
+ (unsigned_code_point >> (6 * 2))) +
+ 0b11100000));
+ write_continue_byte(details::ExtractBits<std::uint32_t, 6, std::uint8_t>(
+ unsigned_code_point >> 6));
+ write_continue_byte(details::ExtractBits<std::uint32_t, 6, std::uint8_t>(
+ unsigned_code_point));
+ return true;
+ } else if (code_point >= 0x10000 && code_point <= 0x10FFFF) {
+ std::uint32_t unsigned_code_point = code_point;
+ append(
+ static_cast<char>(details::ExtractBits<std::uint32_t, 3, std::uint8_t>(
+ (unsigned_code_point >> (6 * 3))) +
+ 0b11110000));
+ write_continue_byte(details::ExtractBits<std::uint32_t, 6, std::uint8_t>(
+ unsigned_code_point >> (6 * 2)));
+ write_continue_byte(details::ExtractBits<std::uint32_t, 6, std::uint8_t>(
+ unsigned_code_point >> 6));
+ write_continue_byte(details::ExtractBits<std::uint32_t, 6, std::uint8_t>(
+ unsigned_code_point));
+ return true;
+ } else {
+ return false;
+ }
+}
+
+template <typename TAppend>
+bool Utf16EncodeCodePointAppendWithFunc(CodePoint code_point,
+ TAppend&& append) {
+ if ((code_point >= 0 && code_point <= 0xD7FF) ||
+ (code_point >= 0xE000 && code_point <= 0xFFFF)) {
+ append(static_cast<char16_t>(code_point));
+ return true;
+ } else if (code_point >= 0x10000 && code_point <= 0x10FFFF) {
+ std::uint32_t u = code_point - 0x10000;
+ append(static_cast<char16_t>(
+ details::ExtractBits<std::uint32_t, 10, std::uint32_t>(u >> 10) +
+ 0xD800u));
+ append(static_cast<char16_t>(
+ details::ExtractBits<std::uint32_t, 10, std::uint32_t>(u) + 0xDC00u));
+ return true;
+ } else {
+ return false;
+ }
+}
std::string CRU_BASE_API ToUtf8(std::u16string_view s);
std::u16string CRU_BASE_API ToUtf16(std::string_view s);
diff --git a/src/common/String.cpp b/src/common/String.cpp
index 699d807f..ba31e6f6 100644
--- a/src/common/String.cpp
+++ b/src/common/String.cpp
@@ -1,4 +1,5 @@
#include "cru/common/String.hpp"
+#include "cru/common/Exception.hpp"
#include "cru/common/StringUtil.hpp"
#include <gsl/gsl>
@@ -214,6 +215,13 @@ std::string String::ToUtf8() const {
return cru::ToUtf8(std::u16string_view(data(), size()));
}
+void String::AppendCodePoint(CodePoint code_point) {
+ if (!Utf16EncodeCodePointAppendWithFunc(
+ code_point, [this](char16_t c) { this->push_back(c); })) {
+ throw TextEncodeException(u"Code point out of range.");
+ }
+}
+
Index String::IndexFromCodeUnitToCodePoint(Index code_unit_index) const {
auto iter = CodePointIterator();
Index result = 0;
diff --git a/src/common/StringUtil.cpp b/src/common/StringUtil.cpp
index b1f1ed4b..7492bdfd 100644
--- a/src/common/StringUtil.cpp
+++ b/src/common/StringUtil.cpp
@@ -1,15 +1,10 @@
#include "cru/common/StringUtil.hpp"
+#include <functional>
#include "cru/common/Base.hpp"
#include "cru/common/Exception.hpp"
namespace cru {
-namespace {
-template <typename UInt, int number_of_bit, typename ReturnType>
-inline std::enable_if_t<std::is_unsigned_v<UInt>, ReturnType> ExtractBits(
- UInt n) {
- return static_cast<ReturnType>(n & ((1u << number_of_bit) - 1));
-}
-} // namespace
+using details::ExtractBits;
CodePoint Utf8NextCodePoint(std::string_view str, Index current,
Index* next_position) {
@@ -154,57 +149,15 @@ CodePoint Utf16PreviousCodePoint(std::u16string_view str, Index current,
}
void Utf8EncodeCodePointAppend(CodePoint code_point, std::string& str) {
- auto write_continue_byte = [&str](std::uint8_t byte6) {
- str.push_back((1u << 7) + (((1u << 6) - 1) & byte6));
- };
-
- if (code_point >= 0 && code_point <= 0x007F) {
- str.push_back(static_cast<char>(code_point));
- } else if (code_point >= 0x0080 && code_point <= 0x07FF) {
- std::uint32_t unsigned_code_point = code_point;
- str.push_back(static_cast<char>(ExtractBits<std::uint32_t, 5, std::uint8_t>(
- (unsigned_code_point >> 6)) +
- 0b11000000));
- write_continue_byte(
- ExtractBits<std::uint32_t, 6, std::uint8_t>(unsigned_code_point));
- } else if (code_point >= 0x0800 && code_point <= 0xFFFF) {
- std::uint32_t unsigned_code_point = code_point;
- str.push_back(static_cast<char>(ExtractBits<std::uint32_t, 4, std::uint8_t>(
- (unsigned_code_point >> (6 * 2))) +
- 0b11100000));
- write_continue_byte(
- ExtractBits<std::uint32_t, 6, std::uint8_t>(unsigned_code_point >> 6));
- write_continue_byte(
- ExtractBits<std::uint32_t, 6, std::uint8_t>(unsigned_code_point));
- } else if (code_point >= 0x10000 && code_point <= 0x10FFFF) {
- std::uint32_t unsigned_code_point = code_point;
- str.push_back(static_cast<char>(ExtractBits<std::uint32_t, 3, std::uint8_t>(
- (unsigned_code_point >> (6 * 3))) +
- 0b11110000));
- write_continue_byte(ExtractBits<std::uint32_t, 6, std::uint8_t>(
- unsigned_code_point >> (6 * 2)));
- write_continue_byte(
- ExtractBits<std::uint32_t, 6, std::uint8_t>(unsigned_code_point >> 6));
- write_continue_byte(
- ExtractBits<std::uint32_t, 6, std::uint8_t>(unsigned_code_point));
- } else {
+ if (!Utf8EncodeCodePointAppendWithFunc(code_point,
+ [&str](char c) { str.push_back(c); }))
throw TextEncodeException(u"Code point out of range.");
- }
}
void Utf16EncodeCodePointAppend(CodePoint code_point, std::u16string& str) {
- if ((code_point >= 0 && code_point <= 0xD7FF) ||
- (code_point >= 0xE000 && code_point <= 0xFFFF)) {
- str.push_back(static_cast<char16_t>(code_point));
- } else if (code_point >= 0x10000 && code_point <= 0x10FFFF) {
- std::uint32_t u = code_point - 0x10000;
- str.push_back(static_cast<char16_t>(
- ExtractBits<std::uint32_t, 10, std::uint32_t>(u >> 10) + 0xD800u));
- str.push_back(static_cast<char16_t>(
- ExtractBits<std::uint32_t, 10, std::uint32_t>(u) + 0xDC00u));
- } else {
+ if (!Utf16EncodeCodePointAppendWithFunc(
+ code_point, [&str](char16_t c) { str.push_back(c); }))
throw TextEncodeException(u"Code point out of range.");
- }
}
std::string ToUtf8(std::u16string_view s) {
diff --git a/src/osx/Convert.cpp b/src/osx/Convert.cpp
index 6bec5adc..6e9692f2 100644
--- a/src/osx/Convert.cpp
+++ b/src/osx/Convert.cpp
@@ -17,9 +17,7 @@ String Convert(CFStringRef string) {
String result;
for (int i = 0; i < length; i++) {
- std::u16string s;
- Utf16EncodeCodePointAppend(CFStringGetCharacterAtIndex(string, i), s);
- result.append(s.data(), s.size());
+ result.AppendCodePoint(CFStringGetCharacterAtIndex(string, i));
}
return result;