aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/cru/base/StringUtil.h284
-rw-r--r--src/base/StringUtil.cpp258
-rw-r--r--src/ui/controls/TextHostControlService.cpp39
-rw-r--r--test/base/StringUtilTest.cpp65
4 files changed, 332 insertions, 314 deletions
diff --git a/include/cru/base/StringUtil.h b/include/cru/base/StringUtil.h
index 7a88f7e8..54e7b6e6 100644
--- a/include/cru/base/StringUtil.h
+++ b/include/cru/base/StringUtil.h
@@ -6,6 +6,7 @@
#include <cctype>
#include <charconv>
#include <compare>
+#include <cstdint>
#include <format>
#include <functional>
#include <string>
@@ -14,8 +15,7 @@
#include <type_traits>
#include <vector>
-namespace cru {
-namespace string {
+namespace cru::string {
std::weak_ordering CaseInsensitiveCompare(std::string_view left,
std::string_view right);
std::string TrimBegin(std::string_view str);
@@ -137,32 +137,158 @@ struct ImplementFormatterByToString {
}
};
-} // namespace string
-
using CodePoint = std::int32_t;
+using Utf8CodeUnit = char;
+using Utf16CodeUnit = char16_t;
constexpr CodePoint k_invalid_code_point = -1;
-inline bool IsUtf16SurrogatePairCodeUnit(char16_t c) {
+inline bool IsUtf8LeadingByte(Utf8CodeUnit c) {
+ return !(c & 0b10000000) || c & 0b01000000;
+}
+
+inline bool IsUtf8FollowingByte(Utf8CodeUnit c) {
+ return !IsUtf8LeadingByte(c);
+}
+
+inline bool IsUtf16SurrogatePairCodeUnit(Utf16CodeUnit c) {
return c >= 0xD800 && c <= 0xDFFF;
}
-inline bool IsUtf16SurrogatePairLeading(char16_t c) {
+inline bool IsUtf16SurrogatePairLeading(Utf16CodeUnit c) {
return c >= 0xD800 && c <= 0xDBFF;
}
-inline bool IsUtf16SurrogatePairTrailing(char16_t c) {
+inline bool IsUtf16SurrogatePairTrailing(Utf16CodeUnit c) {
return c >= 0xDC00 && c <= 0xDFFF;
}
CodePoint CRU_BASE_API Utf8NextCodePoint(const char* ptr, Index size,
Index current, Index* next_position);
-CodePoint CRU_BASE_API Utf16NextCodePoint(const char16_t* ptr, Index size,
+CodePoint CRU_BASE_API Utf8PreviousCodePoint(const char* ptr, Index size,
+ Index current,
+ Index* previous_position);
+
+namespace details {
+template <typename Integer, int number_of_bit, typename ReturnType>
+inline ReturnType ExtractBits(Integer n) {
+ return static_cast<ReturnType>(n & ((1u << number_of_bit) - 1));
+}
+} // namespace details
+
+template <typename CharWriter>
+std::enable_if_t<std::is_invocable_v<CharWriter, Utf8CodeUnit>, bool>
+Utf8EncodeCodePointAppend(CodePoint code_point, CharWriter&& writer) {
+ auto write_continue_byte = [&writer](Utf8CodeUnit byte6) {
+ writer((1u << 7) + (((1u << 6) - 1) & byte6));
+ };
+
+ if (code_point >= 0 && code_point <= 0x007F) {
+ writer(static_cast<Utf8CodeUnit>(code_point));
+ return true;
+ } else if (code_point >= 0x0080 && code_point <= 0x07FF) {
+ std::uint32_t unsigned_code_point = code_point;
+ writer(static_cast<Utf8CodeUnit>(
+ details::ExtractBits<std::uint32_t, 5, Utf8CodeUnit>(
+ (unsigned_code_point >> 6)) +
+ 0b11000000));
+ write_continue_byte(details::ExtractBits<std::uint32_t, 6, Utf8CodeUnit>(
+ unsigned_code_point));
+ return true;
+ } else if (code_point >= 0x0800 && code_point <= 0xFFFF) {
+ std::uint32_t unsigned_code_point = code_point;
+ writer(static_cast<Utf8CodeUnit>(
+ details::ExtractBits<std::uint32_t, 4, Utf8CodeUnit>(
+ (unsigned_code_point >> (6 * 2))) +
+ 0b11100000));
+ write_continue_byte(details::ExtractBits<std::uint32_t, 6, Utf8CodeUnit>(
+ unsigned_code_point >> 6));
+ write_continue_byte(details::ExtractBits<std::uint32_t, 6, Utf8CodeUnit>(
+ unsigned_code_point));
+ return true;
+ } else if (code_point >= 0x10000 && code_point <= 0x10FFFF) {
+ std::uint32_t unsigned_code_point = code_point;
+ writer(static_cast<Utf8CodeUnit>(
+ details::ExtractBits<std::uint32_t, 3, Utf8CodeUnit>(
+ (unsigned_code_point >> (6 * 3))) +
+ 0b11110000));
+ write_continue_byte(details::ExtractBits<std::uint32_t, 6, Utf8CodeUnit>(
+ unsigned_code_point >> (6 * 2)));
+ write_continue_byte(details::ExtractBits<std::uint32_t, 6, Utf8CodeUnit>(
+ unsigned_code_point >> 6));
+ write_continue_byte(details::ExtractBits<std::uint32_t, 6, Utf8CodeUnit>(
+ unsigned_code_point));
+ return true;
+ } else {
+ return false;
+ }
+}
+
+bool CRU_BASE_API Utf8IsValidInsertPosition(const Utf8CodeUnit* ptr, Index size,
+ Index position);
+
+// Return position after the character making predicate returns true or 0 if no
+// character doing so.
+Index CRU_BASE_API
+Utf8BackwardUntil(const Utf8CodeUnit* ptr, Index size, Index position,
+ const std::function<bool(CodePoint)>& predicate);
+// Return position before the character making predicate returns true or
+// str.size() if no character doing so.
+Index CRU_BASE_API
+Utf8ForwardUntil(const Utf8CodeUnit* ptr, Index size, Index position,
+ const std::function<bool(CodePoint)>& predicate);
+
+Index CRU_BASE_API Utf8PreviousWord(const Utf8CodeUnit* ptr, Index size,
+ Index position, bool* is_space = nullptr);
+Index CRU_BASE_API Utf8NextWord(const Utf8CodeUnit* ptr, Index size,
+ Index position, bool* is_space = nullptr);
+
+CodePoint CRU_BASE_API Utf16NextCodePoint(const Utf16CodeUnit* ptr, Index size,
Index current, Index* next_position);
-CodePoint CRU_BASE_API Utf16PreviousCodePoint(const char16_t* ptr, Index size,
- Index current,
+CodePoint CRU_BASE_API Utf16PreviousCodePoint(const Utf16CodeUnit* ptr,
+ Index size, Index current,
Index* previous_position);
+template <typename CharWriter>
+std::enable_if_t<std::is_invocable_v<CharWriter, Utf16CodeUnit>, bool>
+Utf16EncodeCodePointAppend(CodePoint code_point, CharWriter&& writer) {
+ if ((code_point >= 0 && code_point <= 0xD7FF) ||
+ (code_point >= 0xE000 && code_point <= 0xFFFF)) {
+ writer(static_cast<Utf16CodeUnit>(code_point));
+ return true;
+ } else if (code_point >= 0x10000 && code_point <= 0x10FFFF) {
+ std::uint32_t u = code_point - 0x10000;
+ writer(static_cast<Utf16CodeUnit>(
+ details::ExtractBits<std::uint32_t, 10, std::uint32_t>(u >> 10) +
+ 0xD800u));
+ writer(static_cast<Utf16CodeUnit>(
+ details::ExtractBits<std::uint32_t, 10, std::uint32_t>(u) + 0xDC00u));
+ return true;
+ } else {
+ return false;
+ }
+}
+
+// If given s is not a valid utf16 string, return value is UD.
+bool CRU_BASE_API Utf16IsValidInsertPosition(const Utf16CodeUnit* ptr,
+ Index size, Index position);
+
+// Return position after the character making predicate returns true or 0 if no
+// character doing so.
+Index CRU_BASE_API
+Utf16BackwardUntil(const Utf16CodeUnit* ptr, Index size, Index position,
+ const std::function<bool(CodePoint)>& predicate);
+// Return position before the character making predicate returns true or
+// str.size() if no character doing so.
+Index CRU_BASE_API
+Utf16ForwardUntil(const Utf16CodeUnit* ptr, Index size, Index position,
+ const std::function<bool(CodePoint)>& predicate);
+
+Index CRU_BASE_API Utf16PreviousWord(const Utf16CodeUnit* ptr, Index size,
+ Index position, bool* is_space = nullptr);
+Index CRU_BASE_API Utf16NextWord(const Utf16CodeUnit* ptr, Index size,
+ Index position, bool* is_space = nullptr);
+
template <typename CharType>
using NextCodePointFunctionType = CodePoint (*)(const CharType*, Index, Index,
Index*);
@@ -247,138 +373,6 @@ class CodePointIterator {
};
using Utf8CodePointIterator = CodePointIterator<char, &Utf8NextCodePoint>;
-
-using Utf16CodePointIterator = CodePointIterator<char16_t, &Utf16NextCodePoint>;
-
-namespace details {
-template <typename UInt, int number_of_bit, typename ReturnType>
-inline std::enable_if_t<std::is_unsigned_v<UInt>, ReturnType> ExtractBits(
- UInt n) {
- return static_cast<ReturnType>(n & ((1u << number_of_bit) - 1));
-}
-} // namespace details
-
-template <typename CharWriter>
-std::enable_if_t<std::is_invocable_v<CharWriter, char>, bool>
-Utf8EncodeCodePointAppend(CodePoint code_point, CharWriter&& writer) {
- auto write_continue_byte = [&writer](std::uint8_t byte6) {
- writer((1u << 7) + (((1u << 6) - 1) & byte6));
- };
-
- if (code_point >= 0 && code_point <= 0x007F) {
- writer(static_cast<char>(code_point));
- return true;
- } else if (code_point >= 0x0080 && code_point <= 0x07FF) {
- std::uint32_t unsigned_code_point = code_point;
- writer(
- static_cast<char>(details::ExtractBits<std::uint32_t, 5, std::uint8_t>(
- (unsigned_code_point >> 6)) +
- 0b11000000));
- write_continue_byte(details::ExtractBits<std::uint32_t, 6, std::uint8_t>(
- unsigned_code_point));
- return true;
- } else if (code_point >= 0x0800 && code_point <= 0xFFFF) {
- std::uint32_t unsigned_code_point = code_point;
- writer(
- static_cast<char>(details::ExtractBits<std::uint32_t, 4, std::uint8_t>(
- (unsigned_code_point >> (6 * 2))) +
- 0b11100000));
- write_continue_byte(details::ExtractBits<std::uint32_t, 6, std::uint8_t>(
- unsigned_code_point >> 6));
- write_continue_byte(details::ExtractBits<std::uint32_t, 6, std::uint8_t>(
- unsigned_code_point));
- return true;
- } else if (code_point >= 0x10000 && code_point <= 0x10FFFF) {
- std::uint32_t unsigned_code_point = code_point;
- writer(
- static_cast<char>(details::ExtractBits<std::uint32_t, 3, std::uint8_t>(
- (unsigned_code_point >> (6 * 3))) +
- 0b11110000));
- write_continue_byte(details::ExtractBits<std::uint32_t, 6, std::uint8_t>(
- unsigned_code_point >> (6 * 2)));
- write_continue_byte(details::ExtractBits<std::uint32_t, 6, std::uint8_t>(
- unsigned_code_point >> 6));
- write_continue_byte(details::ExtractBits<std::uint32_t, 6, std::uint8_t>(
- unsigned_code_point));
- return true;
- } else {
- return false;
- }
-}
-
-template <typename CharWriter>
-std::enable_if_t<std::is_invocable_v<CharWriter, char16_t>, bool>
-Utf16EncodeCodePointAppend(CodePoint code_point, CharWriter&& writer) {
- if ((code_point >= 0 && code_point <= 0xD7FF) ||
- (code_point >= 0xE000 && code_point <= 0xFFFF)) {
- writer(static_cast<char16_t>(code_point));
- return true;
- } else if (code_point >= 0x10000 && code_point <= 0x10FFFF) {
- std::uint32_t u = code_point - 0x10000;
- writer(static_cast<char16_t>(
- details::ExtractBits<std::uint32_t, 10, std::uint32_t>(u >> 10) +
- 0xD800u));
- writer(static_cast<char16_t>(
- details::ExtractBits<std::uint32_t, 10, std::uint32_t>(u) + 0xDC00u));
- return true;
- } else {
- return false;
- }
-}
-
-// If given s is not a valid utf16 string, return value is UD.
-bool CRU_BASE_API Utf16IsValidInsertPosition(const char16_t* ptr, Index size,
- Index position);
-
-// Return position after the character making predicate returns true or 0 if no
-// character doing so.
-Index CRU_BASE_API
-Utf16BackwardUntil(const char16_t* ptr, Index size, Index position,
- const std::function<bool(CodePoint)>& predicate);
-// Return position before the character making predicate returns true or
-// str.size() if no character doing so.
-Index CRU_BASE_API
-Utf16ForwardUntil(const char16_t* ptr, Index size, Index position,
- const std::function<bool(CodePoint)>& predicate);
-
-Index CRU_BASE_API Utf16PreviousWord(const char16_t* ptr, Index size,
- Index position, bool* is_space = nullptr);
-Index CRU_BASE_API Utf16NextWord(const char16_t* ptr, Index size,
- Index position, bool* is_space = nullptr);
-
-char16_t CRU_BASE_API ToLower(char16_t c);
-char16_t CRU_BASE_API ToUpper(char16_t c);
-
-bool CRU_BASE_API IsWhitespace(char16_t c);
-bool CRU_BASE_API IsDigit(char16_t c);
-
-Utf8CodePointIterator CRU_BASE_API CreateUtf8Iterator(const std::byte* buffer,
- Index size);
-Utf8CodePointIterator CRU_BASE_API
-CreateUtf8Iterator(const std::vector<std::byte>& buffer);
-
-CodePoint CRU_BASE_API Utf8NextCodePoint(std::string_view str, Index current,
- Index* next_position);
-CodePoint CRU_BASE_API Utf8PreviousCodePoint(std::string_view str,
- Index current,
- Index* next_position);
-// Return position after the character making predicate returns true or 0 if no
-// character doing so.
-Index CRU_BASE_API
-Utf8BackwardUntil(std::string_view str, Index position,
- const std::function<bool(CodePoint)>& predicate);
-// Return position before the character making predicate returns true or
-// str.size() if no character doing so.
-Index CRU_BASE_API
-Utf8ForwardUntil(std::string_view str, Index position,
- const std::function<bool(CodePoint)>& predicate);
-
-bool CRU_BASE_API Utf8IsValidInsertPosition(std::string_view str,
- Index position);
-
-Index CRU_BASE_API Utf8PreviousWord(std::string_view str, Index position,
- bool* is_space = nullptr);
-Index CRU_BASE_API Utf8NextWord(std::string_view str, Index position,
- bool* is_space = nullptr);
-
-} // namespace cru
+using Utf16CodePointIterator =
+ CodePointIterator<Utf16CodeUnit, &Utf16NextCodePoint>;
+} // namespace cru::string
diff --git a/src/base/StringUtil.cpp b/src/base/StringUtil.cpp
index 4e622dab..581ebcab 100644
--- a/src/base/StringUtil.cpp
+++ b/src/base/StringUtil.cpp
@@ -7,8 +7,7 @@
#include <compare>
#include <string_view>
-namespace cru {
-namespace string {
+namespace cru::string {
std::weak_ordering CaseInsensitiveCompare(std::string_view left,
std::string_view right) {
@@ -78,18 +77,58 @@ std::vector<std::string> Split(std::string_view str, std::string_view sep,
return result;
}
-} // namespace string
+
+namespace {
+
+template <typename CharType,
+ NextCodePointFunctionType<CharType> NextCodePointFunction>
+Index Until(const CharType* ptr, Index size, Index position,
+ const std::function<bool(CodePoint)>& predicate) {
+ if (position <= 0) return position;
+ while (true) {
+ Index p = position;
+ auto c = NextCodePointFunction(ptr, size, p, &position);
+ if (predicate(c)) return p;
+ if (c == k_invalid_code_point) return p;
+ }
+ UnreachableCode();
+}
+
+static bool IsSpace(CodePoint c) { return c == 0x20 || c == 0xA; }
+
+template <typename CharType>
+using UntilFunctionType = Index (*)(const CharType*, Index, Index,
+ const std::function<bool(CodePoint)>&);
+
+template <typename CharType,
+ NextCodePointFunctionType<CharType> NextCodePointFunction,
+ UntilFunctionType<CharType> UntilFunction>
+Index Word(const CharType* ptr, Index size, Index position, bool* is_space) {
+ if (position <= 0) return position;
+ auto c = NextCodePointFunction(ptr, size, position, nullptr);
+ if (IsSpace(c)) { // TODO: Currently only test against 0x20(space).
+ if (is_space) *is_space = true;
+ return UntilFunction(ptr, size, position,
+ [](CodePoint c) { return !IsSpace(c); });
+ } else {
+ if (is_space) *is_space = false;
+ return UntilFunction(ptr, size, position,
+ [](CodePoint c) { return IsSpace(c); });
+ }
+}
+
+} // namespace
using details::ExtractBits;
-CodePoint Utf8NextCodePoint(const char* ptr, Index size, Index current,
+CodePoint Utf8NextCodePoint(const Utf8CodeUnit* ptr, Index size, Index current,
Index* next_position) {
CodePoint result;
if (current >= size) {
result = k_invalid_code_point;
} else {
- const auto cu0 = static_cast<std::uint8_t>(ptr[current++]);
+ const auto cu0 = static_cast<Utf8CodeUnit>(ptr[current++]);
auto read_next_folowing_code = [ptr, size, &current]() -> CodePoint {
if (current == size)
@@ -97,14 +136,14 @@ CodePoint Utf8NextCodePoint(const char* ptr, Index size, Index current,
"Unexpected end when read continuing byte of multi-byte code "
"point.");
- const auto u = static_cast<std::uint8_t>(ptr[current]);
+ const auto u = static_cast<Utf8CodeUnit>(ptr[current]);
if (!(u & (1u << 7)) || (u & (1u << 6))) {
throw TextEncodeException(
"Unexpected bad-format (not 0b10xxxxxx) continuing byte of "
"multi-byte code point.");
}
- return ExtractBits<std::uint8_t, 6, CodePoint>(ptr[current++]);
+ return ExtractBits<Utf8CodeUnit, 6, CodePoint>(ptr[current++]);
};
if ((1u << 7) & cu0) {
@@ -117,21 +156,21 @@ CodePoint Utf8NextCodePoint(const char* ptr, Index size, Index current,
"code point.");
}
- const CodePoint s0 = ExtractBits<std::uint8_t, 3, CodePoint>(cu0)
+ const CodePoint s0 = ExtractBits<Utf8CodeUnit, 3, CodePoint>(cu0)
<< (6 * 3);
const CodePoint s1 = read_next_folowing_code() << (6 * 2);
const CodePoint s2 = read_next_folowing_code() << 6;
const CodePoint s3 = read_next_folowing_code();
result = s0 + s1 + s2 + s3;
} else { // 3-length code point
- const CodePoint s0 = ExtractBits<std::uint8_t, 4, CodePoint>(cu0)
+ const CodePoint s0 = ExtractBits<Utf8CodeUnit, 4, CodePoint>(cu0)
<< (6 * 2);
const CodePoint s1 = read_next_folowing_code() << 6;
const CodePoint s2 = read_next_folowing_code();
result = s0 + s1 + s2;
}
} else { // 2-length code point
- const CodePoint s0 = ExtractBits<std::uint8_t, 5, CodePoint>(cu0)
+ const CodePoint s0 = ExtractBits<Utf8CodeUnit, 5, CodePoint>(cu0)
<< 6;
const CodePoint s1 = read_next_folowing_code();
result = s0 + s1;
@@ -149,8 +188,67 @@ CodePoint Utf8NextCodePoint(const char* ptr, Index size, Index current,
return result;
}
-CodePoint Utf16NextCodePoint(const char16_t* ptr, Index size, Index current,
- Index* next_position) {
+CodePoint Utf8PreviousCodePoint(const Utf8CodeUnit* ptr, Index size,
+ Index current, Index* previous_position) {
+ CRU_UNUSED(size)
+
+ CodePoint result;
+ if (current <= 0) {
+ result = k_invalid_code_point;
+ } else {
+ current--;
+ int i;
+ for (i = 0; i < 4; i++) {
+ if (IsUtf8LeadingByte(ptr[current])) {
+ break;
+ }
+ current--;
+ }
+ if (i == 4) {
+ throw TextEncodeException(
+ "Failed to find UTF-8 leading byte in 4 previous bytes.");
+ }
+
+ result = Utf8NextCodePoint(ptr, size, current, nullptr);
+ }
+
+ if (previous_position != nullptr) *previous_position = current;
+ return result;
+}
+
+bool Utf8IsValidInsertPosition(const Utf8CodeUnit* ptr, Index size,
+ Index position) {
+ return position == 0 || position == size ||
+ (position > 0 && position < size && IsUtf8LeadingByte(ptr[position]));
+}
+
+Index Utf8BackwardUntil(const Utf8CodeUnit* ptr, Index size, Index position,
+ const std::function<bool(CodePoint)>& predicate) {
+ return Until<Utf8CodeUnit, Utf8PreviousCodePoint>(ptr, size, position,
+ predicate);
+}
+
+Index Utf8ForwardUntil(const Utf8CodeUnit* ptr, Index size, Index position,
+ const std::function<bool(CodePoint)>& predicate) {
+ return Until<Utf8CodeUnit, Utf8NextCodePoint>(ptr, size, position, predicate);
+}
+
+static bool IsSpace(CodePoint c) { return c == 0x20 || c == 0xA; }
+
+Index Utf8PreviousWord(const Utf8CodeUnit* ptr, Index size, Index position,
+ bool* is_space) {
+ return Word<Utf8CodeUnit, Utf8PreviousCodePoint, Utf8BackwardUntil>(
+ ptr, size, position, is_space);
+}
+
+Index Utf8NextWord(const Utf8CodeUnit* ptr, Index size, Index position,
+ bool* is_space) {
+ return Word<Utf8CodeUnit, Utf8NextCodePoint, Utf8ForwardUntil>(
+ ptr, size, position, is_space);
+}
+
+CodePoint Utf16NextCodePoint(const Utf16CodeUnit* ptr, Index size,
+ Index current, Index* next_position) {
CodePoint result;
if (current >= size) {
@@ -172,8 +270,8 @@ CodePoint Utf16NextCodePoint(const char16_t* ptr, Index size, Index current,
"Unexpected bad-range second code unit of surrogate pair.");
}
- const auto s0 = ExtractBits<std::uint16_t, 10, CodePoint>(cu0) << 10;
- const auto s1 = ExtractBits<std::uint16_t, 10, CodePoint>(cu1);
+ const auto s0 = ExtractBits<Utf16CodeUnit, 10, CodePoint>(cu0) << 10;
+ const auto s1 = ExtractBits<Utf16CodeUnit, 10, CodePoint>(cu1);
result = s0 + s1 + 0x10000;
@@ -187,8 +285,8 @@ CodePoint Utf16NextCodePoint(const char16_t* ptr, Index size, Index current,
return result;
}
-CodePoint Utf16PreviousCodePoint(const char16_t* ptr, Index size, Index current,
- Index* previous_position) {
+CodePoint Utf16PreviousCodePoint(const Utf16CodeUnit* ptr, Index size,
+ Index current, Index* previous_position) {
CRU_UNUSED(size)
CodePoint result;
@@ -211,8 +309,8 @@ CodePoint Utf16PreviousCodePoint(const char16_t* ptr, Index size, Index current,
"Unexpected bad-range first code unit of surrogate pair.");
}
- const auto s0 = ExtractBits<std::uint16_t, 10, CodePoint>(cu1) << 10;
- const auto s1 = ExtractBits<std::uint16_t, 10, CodePoint>(cu0);
+ const auto s0 = ExtractBits<Utf16CodeUnit, 10, CodePoint>(cu1) << 10;
+ const auto s1 = ExtractBits<Utf16CodeUnit, 10, CodePoint>(cu0);
result = s0 + s1 + 0x10000;
@@ -226,7 +324,7 @@ CodePoint Utf16PreviousCodePoint(const char16_t* ptr, Index size, Index current,
return result;
}
-bool Utf16IsValidInsertPosition(const char16_t* ptr, Index size,
+bool Utf16IsValidInsertPosition(const Utf16CodeUnit* ptr, Index size,
Index position) {
if (position < 0) return false;
if (position > size) return false;
@@ -235,124 +333,28 @@ bool Utf16IsValidInsertPosition(const char16_t* ptr, Index size,
return !IsUtf16SurrogatePairTrailing(ptr[position]);
}
-Index Utf16BackwardUntil(const char16_t* ptr, Index size, Index position,
+Index Utf16BackwardUntil(const Utf16CodeUnit* ptr, Index size, Index position,
const std::function<bool(CodePoint)>& predicate) {
- if (position <= 0) return position;
- while (true) {
- Index p = position;
- auto c = Utf16PreviousCodePoint(ptr, size, p, &position);
- if (predicate(c)) return p;
- if (c == k_invalid_code_point) return p;
- }
- UnreachableCode();
+ return Until<Utf16CodeUnit, Utf16PreviousCodePoint>(ptr, size, position,
+ predicate);
}
-Index Utf16ForwardUntil(const char16_t* ptr, Index size, Index position,
+Index Utf16ForwardUntil(const Utf16CodeUnit* ptr, Index size, Index position,
const std::function<bool(CodePoint)>& predicate) {
- if (position >= size) return position;
- while (true) {
- Index p = position;
- auto c = Utf16NextCodePoint(ptr, size, p, &position);
- if (predicate(c)) return p;
- if (c == k_invalid_code_point) return p;
- }
- UnreachableCode();
+ return Until<Utf16CodeUnit, Utf16NextCodePoint>(ptr, size, position,
+ predicate);
}
-inline bool IsSpace(CodePoint c) { return c == 0x20 || c == 0xA; }
-
-Index Utf16PreviousWord(const char16_t* ptr, Index size, Index position,
+Index Utf16PreviousWord(const Utf16CodeUnit* ptr, Index size, Index position,
bool* is_space) {
- if (position <= 0) return position;
- auto c = Utf16PreviousCodePoint(ptr, size, position, nullptr);
- if (IsSpace(c)) { // TODO: Currently only test against 0x20(space).
- if (is_space) *is_space = true;
- return Utf16BackwardUntil(ptr, size, position,
- [](CodePoint c) { return !IsSpace(c); });
- } else {
- if (is_space) *is_space = false;
- return Utf16BackwardUntil(ptr, size, position, IsSpace);
- }
+ return Word<Utf16CodeUnit, Utf16PreviousCodePoint, Utf16BackwardUntil>(
+ ptr, size, position, is_space);
}
-Index Utf16NextWord(const char16_t* ptr, Index size, Index position,
+Index Utf16NextWord(const Utf16CodeUnit* ptr, Index size, Index position,
bool* is_space) {
- if (position >= size) return position;
- auto c = Utf16NextCodePoint(ptr, size, position, nullptr);
- if (IsSpace(c)) { // TODO: Currently only test against 0x20(space).
- if (is_space) *is_space = true;
- return Utf16ForwardUntil(ptr, size, position,
- [](CodePoint c) { return !IsSpace(c); });
- } else {
- if (is_space) *is_space = false;
- return Utf16ForwardUntil(ptr, size, position, IsSpace);
- }
-}
-
-char16_t ToLower(char16_t c) {
- if (c >= u'A' && c <= u'Z') {
- return c - u'A' + u'a';
- }
- return c;
-}
-
-char16_t ToUpper(char16_t c) {
- if (c >= u'a' && c <= u'z') {
- return c - u'a' + u'A';
- }
- return c;
-}
-
-bool IsWhitespace(char16_t c) {
- return c == u' ' || c == u'\t' || c == u'\n' || c == u'\r';
-}
-
-bool IsDigit(char16_t c) { return c >= u'0' && c <= u'9'; }
-
-Utf8CodePointIterator CreateUtf8Iterator(const std::byte* buffer, Index size) {
- return Utf8CodePointIterator(reinterpret_cast<const char*>(buffer), size);
-}
-
-Utf8CodePointIterator CreateUtf8Iterator(const std::vector<std::byte>& buffer) {
- return CreateUtf8Iterator(buffer.data(), buffer.size());
-}
-
-CodePoint Utf8NextCodePoint(std::string_view str, Index current,
- Index* next_position) {
- NotImplemented();
-}
-
-CodePoint Utf8PreviousCodePoint(std::string_view str, Index current,
- Index* next_position) {
- NotImplemented();
-}
-
-// Return position after the character making predicate returns true or 0 if no
-// character doing so.
-Index Utf8BackwardUntil(std::string_view str, Index position,
- const std::function<bool(CodePoint)>& predicate) {
- NotImplemented();
-}
-
-// Return position before the character making predicate returns true or
-// str.size() if no character doing so.
-Index Utf8ForwardUntil(std::string_view str, Index position,
- const std::function<bool(CodePoint)>& predicate) {
- NotImplemented();
-}
-
-bool Utf8IsValidInsertPosition(std::string_view str, Index position) {
- NotImplemented();
-}
-
-Index Utf8PreviousWord(std::string_view str, Index position,
- bool* is_space) {
- NotImplemented();
-}
-
-Index Utf8NextWord(std::string_view str, Index position,
- bool* is_space) {
- NotImplemented();
+ return Word<Utf16CodeUnit, Utf16NextCodePoint, Utf16ForwardUntil>(
+ ptr, size, position, is_space);
}
-} // namespace cru
+} // namespace cru::string
diff --git a/src/ui/controls/TextHostControlService.cpp b/src/ui/controls/TextHostControlService.cpp
index bb723e3f..3c38c454 100644
--- a/src/ui/controls/TextHostControlService.cpp
+++ b/src/ui/controls/TextHostControlService.cpp
@@ -5,8 +5,6 @@
#include "cru/base/Base.h"
#include "cru/base/StringUtil.h"
#include "cru/base/log/Logger.h"
-#include "cru/platform/graphics/Font.h"
-#include "cru/platform/gui/Base.h"
#include "cru/platform/gui/Clipboard.h"
#include "cru/platform/gui/Cursor.h"
#include "cru/platform/gui/InputMethod.h"
@@ -16,7 +14,6 @@
#include "cru/ui/DebugFlags.h"
#include "cru/ui/DeleteLater.h"
#include "cru/ui/components/Menu.h"
-#include "cru/ui/events/UiEvents.h"
#include "cru/ui/helper/ShortcutHub.h"
#include "cru/ui/host/WindowHost.h"
#include "cru/ui/render/ScrollRenderObject.h"
@@ -25,12 +22,15 @@
#include <memory>
namespace cru::ui::controls {
+using namespace cru::string;
+
TextControlMovePattern TextControlMovePattern::kLeft(
"Left", helper::ShortcutKeyBind(platform::gui::KeyCode::Left),
[](TextHostControlService* service, std::string_view text,
Index current_position) {
CRU_UNUSED(service)
- Utf8PreviousCodePoint(text, current_position, &current_position);
+ Utf8PreviousCodePoint(text.data(), text.size(), current_position,
+ &current_position);
return current_position;
});
TextControlMovePattern TextControlMovePattern::kRight(
@@ -38,7 +38,8 @@ TextControlMovePattern TextControlMovePattern::kRight(
[](TextHostControlService* service, std::string_view text,
Index current_position) {
CRU_UNUSED(service)
- Utf8NextCodePoint(text, current_position, &current_position);
+ Utf8NextCodePoint(text.data(), text.size(), current_position,
+ &current_position);
return current_position;
});
TextControlMovePattern TextControlMovePattern::kCtrlLeft(
@@ -48,7 +49,7 @@ TextControlMovePattern TextControlMovePattern::kCtrlLeft(
[](TextHostControlService* service, std::string_view text,
Index current_position) {
CRU_UNUSED(service)
- return Utf8PreviousWord(text, current_position);
+ return Utf8PreviousWord(text.data(), text.size(), current_position);
});
TextControlMovePattern TextControlMovePattern::kCtrlRight(
"Ctrl+Right(Next Word)",
@@ -57,7 +58,7 @@ TextControlMovePattern TextControlMovePattern::kCtrlRight(
[](TextHostControlService* service, std::string_view text,
Index current_position) {
CRU_UNUSED(service)
- return Utf8NextWord(text, current_position);
+ return Utf8NextWord(text.data(), text.size(), current_position);
});
TextControlMovePattern TextControlMovePattern::kUp(
"Up", helper::ShortcutKeyBind(platform::gui::KeyCode::Up),
@@ -86,7 +87,7 @@ TextControlMovePattern TextControlMovePattern::kHome(
[](TextHostControlService* service, std::string_view text,
Index current_position) {
CRU_UNUSED(service)
- return Utf8BackwardUntil(text, current_position,
+ return Utf8BackwardUntil(text.data(), text.size(), current_position,
[](CodePoint c) { return c == u'\n'; });
});
TextControlMovePattern TextControlMovePattern::kEnd(
@@ -94,7 +95,7 @@ TextControlMovePattern TextControlMovePattern::kEnd(
[](TextHostControlService* service, std::string_view text,
Index current_position) {
CRU_UNUSED(service)
- return Utf8ForwardUntil(text, current_position,
+ return Utf8ForwardUntil(text.data(), text.size(), current_position,
[](CodePoint c) { return c == u'\n'; });
});
TextControlMovePattern TextControlMovePattern::kCtrlHome(
@@ -225,7 +226,8 @@ void TextHostControlService::SetText(std::string text, bool stop_composition) {
void TextHostControlService::InsertText(Index position, std::string_view text,
bool stop_composition) {
- if (!Utf8IsValidInsertPosition(this->text_, position)) {
+ if (!Utf8IsValidInsertPosition(this->text_.data(), this->text_.size(),
+ position)) {
CRU_LOG_TAG_ERROR("Invalid text insert position.");
return;
}
@@ -239,26 +241,29 @@ void TextHostControlService::InsertText(Index position, std::string_view text,
}
void TextHostControlService::DeleteChar(Index position, bool stop_composition) {
- if (!Utf8IsValidInsertPosition(this->text_, position)) {
+ if (!Utf8IsValidInsertPosition(this->text_.data(), this->text_.size(),
+ position)) {
CRU_LOG_TAG_ERROR("Invalid text delete position.");
return;
}
if (position == static_cast<Index>(this->text_.size())) return;
Index next;
- Utf8NextCodePoint(this->text_, position, &next);
+ Utf8NextCodePoint(this->text_.data(), this->text_.size(), position, &next);
this->DeleteText(TextRange::FromTwoSides(position, next), stop_composition);
}
// Return the position of deleted character.
Index TextHostControlService::DeleteCharPrevious(Index position,
bool stop_composition) {
- if (!Utf8IsValidInsertPosition(this->text_, position)) {
+ if (!Utf8IsValidInsertPosition(this->text_.data(), this->text_.size(),
+ position)) {
CRU_LOG_TAG_ERROR("Invalid text delete position.");
return 0;
}
if (position == 0) return 0;
Index previous;
- Utf8PreviousCodePoint(this->text_, position, &previous);
+ Utf8PreviousCodePoint(this->text_.data(), this->text_.size(), position,
+ &previous);
this->DeleteText(TextRange::FromTwoSides(previous, position),
stop_composition);
return previous;
@@ -268,11 +273,13 @@ void TextHostControlService::DeleteText(TextRange range,
bool stop_composition) {
if (range.count == 0) return;
range = range.Normalize();
- if (!Utf8IsValidInsertPosition(this->text_, range.GetStart())) {
+ if (!Utf8IsValidInsertPosition(this->text_.data(), this->text_.size(),
+ range.GetStart())) {
CRU_LOG_TAG_ERROR("Invalid text delete start position.");
return;
}
- if (!Utf8IsValidInsertPosition(this->text_, range.GetStart())) {
+ if (!Utf8IsValidInsertPosition(this->text_.data(), this->text_.size(),
+ range.GetStart())) {
CRU_LOG_TAG_ERROR("Invalid text delete end position.");
return;
}
diff --git a/test/base/StringUtilTest.cpp b/test/base/StringUtilTest.cpp
index 2b12780c..32fd0d88 100644
--- a/test/base/StringUtilTest.cpp
+++ b/test/base/StringUtilTest.cpp
@@ -3,18 +3,31 @@
#include <catch2/catch_test_macros.hpp>
using cru::Index;
-using cru::k_invalid_code_point;
+using namespace cru::string;
TEST_CASE("StringUtil Split", "[string]") {
- using cru::string::Split;
REQUIRE(Split("abc", "b") == std::vector<std::string>{"a", "c"});
REQUIRE(Split("abcd", "bc") == std::vector<std::string>{"a", "d"});
REQUIRE(Split("abcdbcd", "bc") == std::vector<std::string>{"a", "d", "d"});
REQUIRE(Split("aaa", "a") == std::vector<std::string>{"", "", "", ""});
}
+TEST_CASE("StringUtil Utf8ByteType", "[string]") {
+ REQUIRE(IsUtf8LeadingByte(0b00100000));
+ REQUIRE(IsUtf8LeadingByte(0b01000000));
+ REQUIRE(IsUtf8LeadingByte(0b11000000));
+ REQUIRE(IsUtf8LeadingByte(0b11100000));
+ REQUIRE(IsUtf8LeadingByte(0b11110000));
+ REQUIRE(!IsUtf8LeadingByte(0b10100000));
+ REQUIRE(!IsUtf8FollowingByte(0b00100000));
+ REQUIRE(!IsUtf8FollowingByte(0b01000000));
+ REQUIRE(!IsUtf8FollowingByte(0b11000000));
+ REQUIRE(!IsUtf8FollowingByte(0b11100000));
+ REQUIRE(!IsUtf8FollowingByte(0b11110000));
+ REQUIRE(IsUtf8FollowingByte(0b10100000));
+}
+
TEST_CASE("StringUtil Utf8NextCodePoint", "[string]") {
- using cru::Utf8NextCodePoint;
std::string_view text = "aπ你🤣!";
Index current = 0;
REQUIRE(Utf8NextCodePoint(text.data(), text.size(), current, &current) ==
@@ -32,8 +45,25 @@ TEST_CASE("StringUtil Utf8NextCodePoint", "[string]") {
REQUIRE(current == static_cast<Index>(text.size()));
}
+TEST_CASE("StringUtil Utf8PreviousCodePoint", "[string]") {
+ std::string_view text = "aπ你🤣!";
+ Index current = text.size();
+ REQUIRE(Utf8PreviousCodePoint(text.data(), text.size(), current, &current) ==
+ 0x0021);
+ REQUIRE(Utf8PreviousCodePoint(text.data(), text.size(), current, &current) ==
+ 0x1F923);
+ REQUIRE(Utf8PreviousCodePoint(text.data(), text.size(), current, &current) ==
+ 0x4F60);
+ REQUIRE(Utf8PreviousCodePoint(text.data(), text.size(), current, &current) ==
+ 0x03C0);
+ REQUIRE(Utf8PreviousCodePoint(text.data(), text.size(), current, &current) ==
+ 0x0061);
+ REQUIRE(Utf8PreviousCodePoint(text.data(), text.size(), current, &current) ==
+ k_invalid_code_point);
+ REQUIRE(current == 0);
+}
+
TEST_CASE("StringUtil Utf16NextCodePoint", "[string]") {
- using cru::Utf16NextCodePoint;
std::u16string_view text = u"aπ你🤣!";
Index current = 0;
REQUIRE(Utf16NextCodePoint(text.data(), text.size(), current, &current) ==
@@ -52,7 +82,6 @@ TEST_CASE("StringUtil Utf16NextCodePoint", "[string]") {
}
TEST_CASE("StringUtil Utf16PreviousCodePoint", "[string]") {
- using cru::Utf16PreviousCodePoint;
std::u16string_view text = u"aπ你🤣!";
Index current = text.size();
REQUIRE(Utf16PreviousCodePoint(text.data(), text.size(), current, &current) ==
@@ -71,38 +100,34 @@ TEST_CASE("StringUtil Utf16PreviousCodePoint", "[string]") {
}
TEST_CASE("StringUtil Utf8CodePointIterator", "[string]") {
- using cru::Utf8CodePointIterator;
std::string_view text = "aπ你🤣!";
- std::vector<cru::CodePoint> code_points;
+ std::vector<CodePoint> code_points;
for (auto cp : Utf8CodePointIterator(text.data(), text.size())) {
code_points.push_back(cp);
}
- std::vector<cru::CodePoint> expected_code_points{0x0061, 0x03C0, 0x4F60,
- 0x1F923, 0x0021};
+ std::vector<CodePoint> expected_code_points{0x0061, 0x03C0, 0x4F60, 0x1F923,
+ 0x0021};
REQUIRE(code_points == expected_code_points);
}
TEST_CASE("StringUtil Utf16CodePointIterator", "[string]") {
- using cru::Utf16CodePointIterator;
std::u16string_view text = u"aπ你🤣!";
- std::vector<cru::CodePoint> code_points;
+ std::vector<CodePoint> code_points;
for (auto cp : Utf16CodePointIterator(text.data(), text.size())) {
code_points.push_back(cp);
}
- std::vector<cru::CodePoint> expected_code_points{0x0061, 0x03C0, 0x4F60,
- 0x1F923, 0x0021};
+ std::vector<CodePoint> expected_code_points{0x0061, 0x03C0, 0x4F60, 0x1F923,
+ 0x0021};
REQUIRE(code_points == expected_code_points);
}
TEST_CASE("ParseToNumber Work", "[string]") {
- using namespace cru::string;
-
auto r1 = ParseToNumber<int>("123");
REQUIRE(r1.valid);
REQUIRE(r1.value == 123);
@@ -121,8 +146,6 @@ TEST_CASE("ParseToNumber Work", "[string]") {
}
TEST_CASE("ParseToNumber AllowLeadingZeroFlag", "[string]") {
- using namespace cru::string;
-
auto r1 = ParseToNumber<int>(" 123");
REQUIRE(!r1.valid);
@@ -142,8 +165,6 @@ TEST_CASE("ParseToNumber AllowLeadingZeroFlag", "[string]") {
}
TEST_CASE("StringToIntegerConverterImpl AllowTrailingSpacesFlag", "[string]") {
- using namespace cru::string;
-
auto r1 = ParseToNumber<int>("123 ");
REQUIRE(!r1.valid);
@@ -164,8 +185,6 @@ TEST_CASE("StringToIntegerConverterImpl AllowTrailingSpacesFlag", "[string]") {
}
TEST_CASE("StringToIntegerConverterImpl AllowTrailingJunk", "[string]") {
- using namespace cru::string;
-
auto r1 = ParseToNumber<int>("123ab");
REQUIRE(!r1.valid);
@@ -185,8 +204,6 @@ TEST_CASE("StringToIntegerConverterImpl AllowTrailingJunk", "[string]") {
}
TEST_CASE("StringToIntegerConverterImpl CompositeFlags", "[string]") {
- using namespace cru::string;
-
auto r1 =
ParseToNumber<int>(" 123ab", ParseToNumberFlags::AllowLeadingSpaces |
ParseToNumberFlags::AllowTrailingJunk);
@@ -203,8 +220,6 @@ TEST_CASE("StringToIntegerConverterImpl CompositeFlags", "[string]") {
}
TEST_CASE("String ParseToNumberList", "[string]") {
- using namespace cru::string;
-
auto r1 = ParseToNumberList<int>("123 456 789");
REQUIRE(r1 == std::vector<int>{123, 456, 789});