diff options
Diffstat (limited to 'include/cru/common/StringUtil.hpp')
-rw-r--r-- | include/cru/common/StringUtil.hpp | 115 |
1 files changed, 89 insertions, 26 deletions
diff --git a/include/cru/common/StringUtil.hpp b/include/cru/common/StringUtil.hpp index 714f1d49..b8edc302 100644 --- a/include/cru/common/StringUtil.hpp +++ b/include/cru/common/StringUtil.hpp @@ -10,51 +10,114 @@ class TextEncodeException : public std::runtime_error { using runtime_error::runtime_error; }; -inline bool IsSurrogatePair(char16_t c) { return c >= 0xD800 && c <= 0xDFFF; } +inline bool IsUtf16SurrogatePairCodeUnit(char16_t c) { + return c >= 0xD800 && c <= 0xDFFF; +} -inline bool IsSurrogatePairLeading(char16_t c) { +inline bool IsUtf16SurrogatePairLeading(char16_t c) { return c >= 0xD800 && c <= 0xDBFF; } -inline bool IsSurrogatePairTrailing(char16_t c) { +inline bool IsUtf16SurrogatePairTrailing(char16_t c) { return c >= 0xDC00 && c <= 0xDFFF; } -class Utf16Iterator : public Object { +CodePoint Utf8NextCodePoint(std::string_view str, Index current, + Index* next_position); + +CodePoint Utf16NextCodePoint(std::u16string_view str, Index current, + Index* next_position); +CodePoint Utf16PreviousCodePoint(std::u16string_view str, Index current, + Index* previous_position); + +template <typename StringType> +using NextCodePointFunctionType = CodePoint (*)(StringType, Index, Index*); + +template <typename StringType, + NextCodePointFunctionType<StringType> NextCodePointFunction> +class CodePointIterator { + public: + using difference_type = Index; + using value_type = CodePoint; + using pointer = void; + using reference = value_type; + using iterator_category = std::forward_iterator_tag; + public: - explicit Utf16Iterator(std::u16string_view string) - : string_(std::move(string)) {} - Utf16Iterator(std::u16string_view string, Index position) - : string_(std::move(string)), position_(position) {} + struct past_end_tag_t {}; - CRU_DEFAULT_COPY(Utf16Iterator) - CRU_DEFAULT_MOVE(Utf16Iterator) + explicit CodePointIterator(StringType string) + : string_(std::move(string)), position_(0) {} + explicit CodePointIterator(StringType string, past_end_tag_t) + : string_(std::move(string)), position_(string_.size()) {} - ~Utf16Iterator() = default; + CRU_DEFAULT_COPY(CodePointIterator) + CRU_DEFAULT_MOVE(CodePointIterator) + + ~CodePointIterator() = default; public: - void SetPositionToHead() { position_ = 0; } - void SetPosition(Index position) { position_ = position; } + StringType GetString() const { return string_; } + Index GetPosition() const { return position_; } + + bool IsPastEnd() const { + return position_ == static_cast<Index>(string_.size()); + } - // Backward current position and get previous code point. Return - // k_invalid_code_point if reach head. Throw TextEncodeException if encounter - // encoding problem. - CodePoint Previous(); + public: + CodePointIterator begin() const { return *this; } + CodePointIterator end() const { + return CodePointIterator{string_, past_end_tag_t{}}; + } - // Advance current position and get next code point. Return - // k_invalid_code_point if reach tail. Throw TextEncodeException if encounter - // encoding problem. - CodePoint Next(); + public: + bool operator==(const CodePointIterator& other) const { + // You should compare iterator that iterate on the same string. + Expects(this->string_.data() == other.string_.data() && + this->string_.size() == other.string_.size()); + return this->position_ == other.position_; + } + bool operator!=(const CodePointIterator& other) const { + return !this->operator==(other); + } + + CodePointIterator& operator++() { + Expects(!IsPastEnd()); + Forward(); + return *this; + } + + CodePointIterator operator++(int) { + Expects(!IsPastEnd()); + CodePointIterator old = *this; + Forward(); + return old; + } + + CodePoint operator*() const { + return NextCodePointFunction(string_, position_, &next_position_cache_); + } - Index CurrentPosition() const { return this->position_; } + private: + void Forward() { + if (next_position_cache_ > position_) { + position_ = next_position_cache_; + } else { + NextCodePointFunction(string_, position_, &position_); + } + } private: - std::u16string_view string_; - Index position_ = 0; + StringType string_; + Index position_; + mutable Index next_position_cache_; }; -Index PreviousIndex(std::u16string_view string, Index current); -Index NextIndex(std::u16string_view string, Index current); +using Utf8CodePointIterator = + CodePointIterator<std::string_view, &Utf8NextCodePoint>; + +using Utf16CodePointIterator = + CodePointIterator<std::u16string_view, &Utf16NextCodePoint>; std::string ToUtf8(const std::u16string& s); inline std::string ToUtf8(std::u16string_view s) { |