From 99e2e923d0c77b02f3fb4ff648ea916954868606 Mon Sep 17 00:00:00 2001 From: Yuqian Yang Date: Fri, 28 Feb 2025 23:13:39 +0800 Subject: chore(store): move everything to store. --- .../computer-network-experiment/StringUtil.hpp | 158 +++++++++++++++++++++ 1 file changed, 158 insertions(+) create mode 100644 store/works/life/computer-network-experiment/StringUtil.hpp (limited to 'store/works/life/computer-network-experiment/StringUtil.hpp') diff --git a/store/works/life/computer-network-experiment/StringUtil.hpp b/store/works/life/computer-network-experiment/StringUtil.hpp new file mode 100644 index 0000000..b0ca675 --- /dev/null +++ b/store/works/life/computer-network-experiment/StringUtil.hpp @@ -0,0 +1,158 @@ +#pragma once +#include "Base.hpp" + +#include +#include +#include + +namespace cru { +using CodePoint = std::int32_t; +constexpr CodePoint k_invalid_code_point = -1; + +class TextEncodeException : public std::runtime_error { + public: + using runtime_error::runtime_error; +}; + +inline bool IsUtf16SurrogatePairCodeUnit(char16_t c) { + return c >= 0xD800 && c <= 0xDFFF; +} + +inline bool IsUtf16SurrogatePairLeading(char16_t c) { + return c >= 0xD800 && c <= 0xDBFF; +} + +inline bool IsUtf16SurrogatePairTrailing(char16_t c) { + return c >= 0xDC00 && c <= 0xDFFF; +} + +CodePoint Utf8NextCodePoint(std::string_view str, Index current, + Index* next_position); + +CodePoint Utf16NextCodePoint(std::u16string_view str, Index current, + Index* next_position); +CodePoint Utf16PreviousCodePoint(std::u16string_view str, Index current, + Index* previous_position); + +template +using NextCodePointFunctionType = CodePoint (*)(StringType, Index, Index*); + +template NextCodePointFunction> +class CodePointIterator { + public: + using difference_type = Index; + using value_type = CodePoint; + using pointer = void; + using reference = value_type; + using iterator_category = std::forward_iterator_tag; + + public: + struct past_end_tag_t {}; + + explicit CodePointIterator(StringType string) + : string_(std::move(string)), position_(0) {} + explicit CodePointIterator(StringType string, past_end_tag_t) + : string_(std::move(string)), position_(string_.size()) {} + + CRU_DEFAULT_COPY(CodePointIterator) + CRU_DEFAULT_MOVE(CodePointIterator) + + ~CodePointIterator() = default; + + public: + StringType GetString() const { return string_; } + Index GetPosition() const { return position_; } + + bool IsPastEnd() const { + return position_ == static_cast(string_.size()); + } + + public: + CodePointIterator begin() const { return *this; } + CodePointIterator end() const { + return CodePointIterator{string_, past_end_tag_t{}}; + } + + public: + bool operator==(const CodePointIterator& other) const { + // You should compare iterator that iterate on the same string. + Expects(this->string_.data() == other.string_.data() && + this->string_.size() == other.string_.size()); + return this->position_ == other.position_; + } + bool operator!=(const CodePointIterator& other) const { + return !this->operator==(other); + } + + CodePointIterator& operator++() { + Expects(!IsPastEnd()); + Forward(); + return *this; + } + + CodePointIterator operator++(int) { + Expects(!IsPastEnd()); + CodePointIterator old = *this; + Forward(); + return old; + } + + CodePoint operator*() const { + return NextCodePointFunction(string_, position_, &next_position_cache_); + } + + private: + void Forward() { + if (next_position_cache_ > position_) { + position_ = next_position_cache_; + } else { + NextCodePointFunction(string_, position_, &position_); + } + } + + private: + StringType string_; + Index position_; + mutable Index next_position_cache_; +}; + +using Utf8CodePointIterator = + CodePointIterator; + +using Utf16CodePointIterator = + CodePointIterator; + +void Utf8EncodeCodePointAppend(CodePoint code_point, std::string& str); +void Utf16EncodeCodePointAppend(CodePoint code_point, std::u16string& str); + +std::string ToUtf8(std::u16string_view s); +std::u16string ToUtf16(std::string_view s); + +#ifdef WIN32 +std::string ToUtf8(std::wstring_view s); +std::wstring ToUtf16WString(std::string_view s); +#endif + +// If given s is not a valid utf16 string, return value is UD. +bool Utf16IsValidInsertPosition(std::u16string_view s, gsl::index position); + +// Return position after the character making predicate returns true or 0 if no +// character doing so. +gsl::index Utf16BackwardUntil(std::u16string_view str, gsl::index position, + const std::function& predicate); +// Return position before the character making predicate returns true or +// str.size() if no character doing so. +gsl::index Utf16ForwardUntil(std::u16string_view str, gsl::index position, + const std::function& predicate); + +gsl::index Utf16PreviousWord(std::u16string_view str, gsl::index position, + bool* is_space = nullptr); +gsl::index Utf16NextWord(std::u16string_view str, gsl::index position, + bool* is_space = nullptr); + +char16_t ToLower(char16_t c); +char16_t ToUpper(char16_t c); +std::u16string ToLower(std::u16string_view s); +std::u16string ToUpper(std::u16string_view s); +} // namespace cru -- cgit v1.2.3