From 6d8fecb163a9c813a1b533970997353d33b6bf5e Mon Sep 17 00:00:00 2001 From: crupest Date: Sun, 6 Jun 2021 18:37:08 +0800 Subject: import(life): ... --- .../computer-network-experiment/StringUtil.hpp | 157 +++++++++++++++++++++ 1 file changed, 157 insertions(+) create mode 100644 works/life/computer-network-experiment/StringUtil.hpp (limited to 'works/life/computer-network-experiment/StringUtil.hpp') diff --git a/works/life/computer-network-experiment/StringUtil.hpp b/works/life/computer-network-experiment/StringUtil.hpp new file mode 100644 index 0000000..1a9634a --- /dev/null +++ b/works/life/computer-network-experiment/StringUtil.hpp @@ -0,0 +1,157 @@ +#pragma once +#include "Base.hpp" + +#include +#include +#include + +namespace cru { +using CodePoint = std::int32_t; +constexpr CodePoint k_invalid_code_point = -1; + +class TextEncodeException : public std::runtime_error { + public: + using runtime_error::runtime_error; +}; + +inline bool IsUtf16SurrogatePairCodeUnit(char16_t c) { + return c >= 0xD800 && c <= 0xDFFF; +} + +inline bool IsUtf16SurrogatePairLeading(char16_t c) { + return c >= 0xD800 && c <= 0xDBFF; +} + +inline bool IsUtf16SurrogatePairTrailing(char16_t c) { + return c >= 0xDC00 && c <= 0xDFFF; +} + +CodePoint Utf8NextCodePoint(std::string_view str, Index current, + Index* next_position); + +CodePoint Utf16NextCodePoint(std::u16string_view str, Index current, + Index* next_position); +CodePoint Utf16PreviousCodePoint(std::u16string_view str, Index current, + Index* previous_position); + +template +using NextCodePointFunctionType = CodePoint (*)(StringType, Index, Index*); + +template NextCodePointFunction> +class CodePointIterator { + public: + using difference_type = Index; + using value_type = CodePoint; + using pointer = void; + using reference = value_type; + using iterator_category = std::forward_iterator_tag; + + public: + struct past_end_tag_t {}; + + explicit CodePointIterator(StringType string) + : string_(std::move(string)), position_(0) {} + explicit CodePointIterator(StringType string, past_end_tag_t) + : string_(std::move(string)), position_(string_.size()) {} + + CRU_DEFAULT_COPY(CodePointIterator) + CRU_DEFAULT_MOVE(CodePointIterator) + + ~CodePointIterator() = default; + + public: + StringType GetString() const { return string_; } + Index GetPosition() const { return position_; } + + bool IsPastEnd() const { + return position_ == static_cast(string_.size()); + } + + public: + CodePointIterator begin() const { return *this; } + CodePointIterator end() const { + return CodePointIterator{string_, past_end_tag_t{}}; + } + + public: + bool operator==(const CodePointIterator& other) const { + // You should compare iterator that iterate on the same string. + Expects(this->string_.data() == other.string_.data() && + this->string_.size() == other.string_.size()); + return this->position_ == other.position_; + } + bool operator!=(const CodePointIterator& other) const { + return !this->operator==(other); + } + + CodePointIterator& operator++() { + Expects(!IsPastEnd()); + Forward(); + return *this; + } + + CodePointIterator operator++(int) { + Expects(!IsPastEnd()); + CodePointIterator old = *this; + Forward(); + return old; + } + + CodePoint operator*() const { + return NextCodePointFunction(string_, position_, &next_position_cache_); + } + + private: + void Forward() { + if (next_position_cache_ > position_) { + position_ = next_position_cache_; + } else { + NextCodePointFunction(string_, position_, &position_); + } + } + + private: + StringType string_; + Index position_; + mutable Index next_position_cache_; +}; + +using Utf8CodePointIterator = + CodePointIterator; + +using Utf16CodePointIterator = + CodePointIterator; + +void Utf8EncodeCodePointAppend(CodePoint code_point, std::string& str); +void Utf16EncodeCodePointAppend(CodePoint code_point, std::u16string& str); + +std::string ToUtf8(std::u16string_view s); +std::u16string ToUtf16(std::string_view s); + +#ifdef WIN32 +std::wstring ToUtf16WString(std::string_view s); +#endif + +// If given s is not a valid utf16 string, return value is UD. +bool Utf16IsValidInsertPosition(std::u16string_view s, gsl::index position); + +// Return position after the character making predicate returns true or 0 if no +// character doing so. +gsl::index Utf16BackwardUntil(std::u16string_view str, gsl::index position, + const std::function& predicate); +// Return position before the character making predicate returns true or +// str.size() if no character doing so. +gsl::index Utf16ForwardUntil(std::u16string_view str, gsl::index position, + const std::function& predicate); + +gsl::index Utf16PreviousWord(std::u16string_view str, gsl::index position, + bool* is_space = nullptr); +gsl::index Utf16NextWord(std::u16string_view str, gsl::index position, + bool* is_space = nullptr); + +char16_t ToLower(char16_t c); +char16_t ToUpper(char16_t c); +std::u16string ToLower(std::u16string_view s); +std::u16string ToUpper(std::u16string_view s); +} // namespace cru -- cgit v1.2.3 From 42f7fc1876cbe68569771b97a8935fbca7fa3ee4 Mon Sep 17 00:00:00 2001 From: crupest Date: Mon, 7 Jun 2021 20:42:28 +0800 Subject: import(life): ... --- works/life/computer-network-experiment/Common.h | 11 +- works/life/computer-network-experiment/Output.cpp | 4 +- .../computer-network-experiment/StringUtil.cpp | 120 +++++++++++++-------- .../computer-network-experiment/StringUtil.hpp | 1 + works/life/computer-network-experiment/client.cpp | 2 +- 5 files changed, 84 insertions(+), 54 deletions(-) (limited to 'works/life/computer-network-experiment/StringUtil.hpp') diff --git a/works/life/computer-network-experiment/Common.h b/works/life/computer-network-experiment/Common.h index c3b6094..6886e38 100644 --- a/works/life/computer-network-experiment/Common.h +++ b/works/life/computer-network-experiment/Common.h @@ -17,6 +17,10 @@ inline auto &error_stream = std::wcerr; inline String ConvertCharString(std::string_view s) { return cru::ToUtf16WString(s); } + +inline std::string ConvertCharStringBack(StringView s) { + return cru::ToUtf8(s); +} #else using Char = char; using String = std::string; @@ -27,6 +31,7 @@ inline auto &error_stream = std::cerr; #define CRUT(string_literal) string_literal inline String ConvertCharString(std::string_view s) { return String(s); } +inline std::string ConvertCharStringBack(StringView s) { return {s}; } #endif int Main(); @@ -34,10 +39,6 @@ int Main(); [[noreturn]] void PrintErrorMessageAndExit(StringView message, bool print_last_error = true); -#ifdef WIN32 -void InitWSA(); -#endif - int CloseSocket(int socket); void BeforeExit(); @@ -45,4 +46,4 @@ void BeforeExit(); String ReadInputLine(); void SafeSend(int socket, std::string_view buffer); -std::string SafeReadUntil(int socket, char c, std::string& rest); +std::string SafeReadUntil(int socket, char c, std::string &rest); diff --git a/works/life/computer-network-experiment/Output.cpp b/works/life/computer-network-experiment/Output.cpp index db97e5e..fbbd6ba 100644 --- a/works/life/computer-network-experiment/Output.cpp +++ b/works/life/computer-network-experiment/Output.cpp @@ -47,7 +47,7 @@ void PrintOutput(const Output &output) { void OutputThread() { while (true) { - m.lock(); + std::lock_guard guard(m); if (cancellation_source.getToken().isCancellationRequested()) { while (true) { @@ -63,8 +63,6 @@ void OutputThread() { Output output; if (output_queue.readIfNotEmpty(output)) PrintOutput(output); - - m.unlock(); } } diff --git a/works/life/computer-network-experiment/StringUtil.cpp b/works/life/computer-network-experiment/StringUtil.cpp index 1224bdc..6bf906d 100644 --- a/works/life/computer-network-experiment/StringUtil.cpp +++ b/works/life/computer-network-experiment/StringUtil.cpp @@ -5,14 +5,14 @@ namespace cru { namespace { template -inline std::enable_if_t, ReturnType> ExtractBits( - UInt n) { +inline std::enable_if_t, ReturnType> +ExtractBits(UInt n) { return static_cast(n & ((1u << number_of_bit) - 1)); } -} // namespace +} // namespace CodePoint Utf8NextCodePoint(std::string_view str, Index current, - Index* next_position) { + Index *next_position) { CodePoint result; if (current >= static_cast(str.length())) { @@ -37,9 +37,9 @@ CodePoint Utf8NextCodePoint(std::string_view str, Index current, }; if ((1u << 7) & cu0) { - if ((1u << 6) & cu0) { // 2~4-length code point - if ((1u << 5) & cu0) { // 3~4-length code point - if ((1u << 4) & cu0) { // 4-length code point + if ((1u << 6) & cu0) { // 2~4-length code point + if ((1u << 5) & cu0) { // 3~4-length code point + if ((1u << 4) & cu0) { // 4-length code point if (cu0 & (1u << 3)) { throw TextEncodeException( "Unexpected bad-format begin byte (not 0b11110xxx) of 4-byte" @@ -52,14 +52,14 @@ CodePoint Utf8NextCodePoint(std::string_view str, Index current, const CodePoint s2 = read_next_folowing_code() << 6; const CodePoint s3 = read_next_folowing_code(); result = s0 + s1 + s2 + s3; - } else { // 3-length code point + } else { // 3-length code point const CodePoint s0 = ExtractBits(cu0) << (6 * 2); const CodePoint s1 = read_next_folowing_code() << 6; const CodePoint s2 = read_next_folowing_code(); result = s0 + s1 + s2; } - } else { // 2-length code point + } else { // 2-length code point const CodePoint s0 = ExtractBits(cu0) << 6; const CodePoint s1 = read_next_folowing_code(); @@ -74,12 +74,13 @@ CodePoint Utf8NextCodePoint(std::string_view str, Index current, } } - if (next_position != nullptr) *next_position = current; + if (next_position != nullptr) + *next_position = current; return result; } CodePoint Utf16NextCodePoint(std::u16string_view str, Index current, - Index* next_position) { + Index *next_position) { CodePoint result; if (current >= static_cast(str.length())) { @@ -87,9 +88,9 @@ CodePoint Utf16NextCodePoint(std::u16string_view str, Index current, } else { const auto cu0 = str[current++]; - if (!IsUtf16SurrogatePairCodeUnit(cu0)) { // 1-length code point + if (!IsUtf16SurrogatePairCodeUnit(cu0)) { // 1-length code point result = static_cast(cu0); - } else if (IsUtf16SurrogatePairLeading(cu0)) { // 2-length code point + } else if (IsUtf16SurrogatePairLeading(cu0)) { // 2-length code point if (current >= static_cast(str.length())) { throw TextEncodeException( "Unexpected end when reading second code unit of surrogate pair."); @@ -112,21 +113,22 @@ CodePoint Utf16NextCodePoint(std::u16string_view str, Index current, } } - if (next_position != nullptr) *next_position = current; + if (next_position != nullptr) + *next_position = current; return result; } CodePoint Utf16PreviousCodePoint(std::u16string_view str, Index current, - Index* previous_position) { + Index *previous_position) { CodePoint result; if (current <= 0) { result = k_invalid_code_point; } else { const auto cu0 = str[--current]; - if (!IsUtf16SurrogatePairCodeUnit(cu0)) { // 1-length code point + if (!IsUtf16SurrogatePairCodeUnit(cu0)) { // 1-length code point result = static_cast(cu0); - } else if (IsUtf16SurrogatePairTrailing(cu0)) { // 2-length code point + } else if (IsUtf16SurrogatePairTrailing(cu0)) { // 2-length code point if (current <= 0) { throw TextEncodeException( "Unexpected end when reading first code unit of surrogate pair."); @@ -149,11 +151,12 @@ CodePoint Utf16PreviousCodePoint(std::u16string_view str, Index current, } } - if (previous_position != nullptr) *previous_position = current; + if (previous_position != nullptr) + *previous_position = current; return result; } -void Utf8EncodeCodePointAppend(CodePoint code_point, std::string& str) { +void Utf8EncodeCodePointAppend(CodePoint code_point, std::string &str) { auto write_continue_byte = [&str](std::uint8_t byte6) { str.push_back((1u << 7) + (((1u << 6) - 1) & byte6)); }; @@ -192,7 +195,7 @@ void Utf8EncodeCodePointAppend(CodePoint code_point, std::string& str) { } } -void Utf16EncodeCodePointAppend(CodePoint code_point, std::u16string& str) { +void Utf16EncodeCodePointAppend(CodePoint code_point, std::u16string &str) { if ((code_point >= 0 && code_point <= 0xD7FF) || (code_point >= 0xE000 && code_point <= 0xFFFF)) { str.push_back(static_cast(code_point)); @@ -224,6 +227,15 @@ std::u16string ToUtf16(std::string_view s) { } #ifdef WIN32 +std::string ToUtf8(std::wstring_view s) { + std::u16string_view string{reinterpret_cast(s.data()), + s.size()}; + std::string result; + for (CodePoint cp : Utf16CodePointIterator{string}) { + Utf8EncodeCodePointAppend(cp, result); + } + return result; +} std::wstring ToUtf16WString(std::string_view s) { std::u16string result; for (CodePoint cp : Utf8CodePointIterator{s}) { @@ -236,33 +248,43 @@ std::wstring ToUtf16WString(std::string_view s) { #endif bool Utf16IsValidInsertPosition(std::u16string_view s, gsl::index position) { - if (position < 0) return false; - if (position > static_cast(s.size())) return false; - if (position == 0) return true; - if (position == static_cast(s.size())) return true; + if (position < 0) + return false; + if (position > static_cast(s.size())) + return false; + if (position == 0) + return true; + if (position == static_cast(s.size())) + return true; return !IsUtf16SurrogatePairTrailing(s[position]); } gsl::index Utf16BackwardUntil(std::u16string_view str, gsl::index position, - const std::function& predicate) { - if (position <= 0) return position; + const std::function &predicate) { + if (position <= 0) + return position; while (true) { gsl::index p = position; auto c = Utf16PreviousCodePoint(str, p, &position); - if (predicate(c)) return p; - if (c == k_invalid_code_point) return p; + if (predicate(c)) + return p; + if (c == k_invalid_code_point) + return p; } UnreachableCode(); } gsl::index Utf16ForwardUntil(std::u16string_view str, gsl::index position, - const std::function& predicate) { - if (position >= static_cast(str.size())) return position; + const std::function &predicate) { + if (position >= static_cast(str.size())) + return position; while (true) { gsl::index p = position; auto c = Utf16NextCodePoint(str, p, &position); - if (predicate(c)) return p; - if (c == k_invalid_code_point) return p; + if (predicate(c)) + return p; + if (c == k_invalid_code_point) + return p; } UnreachableCode(); } @@ -270,29 +292,35 @@ gsl::index Utf16ForwardUntil(std::u16string_view str, gsl::index position, inline bool IsSpace(CodePoint c) { return c == 0x20 || c == 0xA; } gsl::index Utf16PreviousWord(std::u16string_view str, gsl::index position, - bool* is_space) { - if (position <= 0) return position; + bool *is_space) { + if (position <= 0) + return position; auto c = Utf16PreviousCodePoint(str, position, nullptr); - if (IsSpace(c)) { // TODO: Currently only test against 0x20(space). - if (is_space) *is_space = true; + if (IsSpace(c)) { // TODO: Currently only test against 0x20(space). + if (is_space) + *is_space = true; return Utf16BackwardUntil(str, position, [](CodePoint c) { return !IsSpace(c); }); } else { - if (is_space) *is_space = false; + if (is_space) + *is_space = false; return Utf16BackwardUntil(str, position, IsSpace); } } gsl::index Utf16NextWord(std::u16string_view str, gsl::index position, - bool* is_space) { - if (position >= static_cast(str.size())) return position; + bool *is_space) { + if (position >= static_cast(str.size())) + return position; auto c = Utf16NextCodePoint(str, position, nullptr); - if (IsSpace(c)) { // TODO: Currently only test against 0x20(space). - if (is_space) *is_space = true; + if (IsSpace(c)) { // TODO: Currently only test against 0x20(space). + if (is_space) + *is_space = true; return Utf16ForwardUntil(str, position, [](CodePoint c) { return !IsSpace(c); }); } else { - if (is_space) *is_space = false; + if (is_space) + *is_space = false; return Utf16ForwardUntil(str, position, IsSpace); } } @@ -313,13 +341,15 @@ char16_t ToUpper(char16_t c) { std::u16string ToLower(std::u16string_view s) { std::u16string result; - for (auto c : s) result.push_back(ToLower(c)); + for (auto c : s) + result.push_back(ToLower(c)); return result; } std::u16string ToUpper(std::u16string_view s) { std::u16string result; - for (auto c : s) result.push_back(ToUpper(c)); + for (auto c : s) + result.push_back(ToUpper(c)); return result; } -} // namespace cru +} // namespace cru diff --git a/works/life/computer-network-experiment/StringUtil.hpp b/works/life/computer-network-experiment/StringUtil.hpp index 1a9634a..b0ca675 100644 --- a/works/life/computer-network-experiment/StringUtil.hpp +++ b/works/life/computer-network-experiment/StringUtil.hpp @@ -130,6 +130,7 @@ std::string ToUtf8(std::u16string_view s); std::u16string ToUtf16(std::string_view s); #ifdef WIN32 +std::string ToUtf8(std::wstring_view s); std::wstring ToUtf16WString(std::string_view s); #endif diff --git a/works/life/computer-network-experiment/client.cpp b/works/life/computer-network-experiment/client.cpp index 489948f..c25a26b 100644 --- a/works/life/computer-network-experiment/client.cpp +++ b/works/life/computer-network-experiment/client.cpp @@ -45,7 +45,7 @@ int Main() { } name.push_back(CRUT('\n')); - String name_data = ConvertCharString(name); + auto name_data = ConvertCharStringBack(name); SafeSend(client_socket, name_data); CloseSocket(client_socket); -- cgit v1.2.3