From 6d8fecb163a9c813a1b533970997353d33b6bf5e Mon Sep 17 00:00:00 2001 From: crupest Date: Sun, 6 Jun 2021 18:37:08 +0800 Subject: import(life): ... --- works/life/computer-network-experiment/Base.hpp | 59 ++++ .../computer-network-experiment/CMakeLists.txt | 7 +- .../life/computer-network-experiment/PreConfig.hpp | 6 + .../computer-network-experiment/StringUtil.cpp | 325 +++++++++++++++++++++ .../computer-network-experiment/StringUtil.hpp | 157 ++++++++++ works/life/computer-network-experiment/server.cpp | 77 ++++- 6 files changed, 615 insertions(+), 16 deletions(-) create mode 100644 works/life/computer-network-experiment/Base.hpp create mode 100644 works/life/computer-network-experiment/PreConfig.hpp create mode 100644 works/life/computer-network-experiment/StringUtil.cpp create mode 100644 works/life/computer-network-experiment/StringUtil.hpp (limited to 'works/life/computer-network-experiment') diff --git a/works/life/computer-network-experiment/Base.hpp b/works/life/computer-network-experiment/Base.hpp new file mode 100644 index 0000000..b1ad55c --- /dev/null +++ b/works/life/computer-network-experiment/Base.hpp @@ -0,0 +1,59 @@ +#pragma once +#include "PreConfig.hpp" + +#include +#include +#include + +#define CRU_UNUSED(entity) static_cast(entity); + +#define CRU__CONCAT(a, b) a##b +#define CRU_MAKE_UNICODE_LITERAL(str) CRU__CONCAT(u, #str) + +#define CRU_DEFAULT_COPY(classname) \ + classname(const classname&) = default; \ + classname& operator=(const classname&) = default; + +#define CRU_DEFAULT_MOVE(classname) \ + classname(classname&&) = default; \ + classname& operator=(classname&&) = default; + +#define CRU_DELETE_COPY(classname) \ + classname(const classname&) = delete; \ + classname& operator=(const classname&) = delete; + +#define CRU_DELETE_MOVE(classname) \ + classname(classname&&) = delete; \ + classname& operator=(classname&&) = delete; + +namespace cru { +class Object { + public: + Object() = default; + CRU_DEFAULT_COPY(Object) + CRU_DEFAULT_MOVE(Object) + virtual ~Object() = default; +}; + +struct Interface { + Interface() = default; + CRU_DELETE_COPY(Interface) + CRU_DELETE_MOVE(Interface) + virtual ~Interface() = default; +}; + +[[noreturn]] inline void UnreachableCode() { std::terminate(); } + +using Index = gsl::index; + +// https://www.boost.org/doc/libs/1_54_0/doc/html/hash/reference.html#boost.hash_combine +template +inline void hash_combine(std::size_t& s, const T& v) { + std::hash h; + s ^= h(v) + 0x9e3779b9 + (s << 6) + (s >> 2); +} + +#define CRU_DEFINE_CLASS_LOG_TAG(tag) \ + private: \ + constexpr static std::u16string_view log_tag = tag; +} // namespace cru diff --git a/works/life/computer-network-experiment/CMakeLists.txt b/works/life/computer-network-experiment/CMakeLists.txt index 41fec71..923de1f 100644 --- a/works/life/computer-network-experiment/CMakeLists.txt +++ b/works/life/computer-network-experiment/CMakeLists.txt @@ -7,8 +7,13 @@ project(network-experiment) set(CMAKE_CXX_STANDARD 17) +find_package(fmt CONFIG REQUIRED) +find_package(Microsoft.GSL CONFIG REQUIRED) +add_library(base STATIC StringUtil.cpp) +target_link_libraries(base PUBLIC Microsoft.GSL::GSL fmt::fmt) + add_executable(client client.cpp) add_executable(server server.cpp) find_package(folly CONFIG REQUIRED) -target_link_libraries(server PRIVATE Folly::folly) +target_link_libraries(server PRIVATE base Folly::folly) diff --git a/works/life/computer-network-experiment/PreConfig.hpp b/works/life/computer-network-experiment/PreConfig.hpp new file mode 100644 index 0000000..d78292c --- /dev/null +++ b/works/life/computer-network-experiment/PreConfig.hpp @@ -0,0 +1,6 @@ +#pragma once + +#ifdef _MSC_VER +// disable the unnecessary warning about multi-inheritance +#pragma warning(disable : 4250) +#endif diff --git a/works/life/computer-network-experiment/StringUtil.cpp b/works/life/computer-network-experiment/StringUtil.cpp new file mode 100644 index 0000000..1224bdc --- /dev/null +++ b/works/life/computer-network-experiment/StringUtil.cpp @@ -0,0 +1,325 @@ +#include "StringUtil.hpp" +#include "Base.hpp" +#include + +namespace cru { +namespace { +template +inline std::enable_if_t, ReturnType> ExtractBits( + UInt n) { + return static_cast(n & ((1u << number_of_bit) - 1)); +} +} // namespace + +CodePoint Utf8NextCodePoint(std::string_view str, Index current, + Index* next_position) { + CodePoint result; + + if (current >= static_cast(str.length())) { + result = k_invalid_code_point; + } else { + const auto cu0 = static_cast(str[current++]); + + auto read_next_folowing_code = [&str, ¤t]() -> CodePoint { + if (current == static_cast(str.length())) + throw TextEncodeException( + "Unexpected end when read continuing byte of multi-byte code " + "point."); + + const auto u = static_cast(str[current]); + if (!(u & (1u << 7)) || (u & (1u << 6))) { + throw TextEncodeException( + "Unexpected bad-format (not 0b10xxxxxx) continuing byte of " + "multi-byte code point."); + } + + return ExtractBits(str[current++]); + }; + + if ((1u << 7) & cu0) { + if ((1u << 6) & cu0) { // 2~4-length code point + if ((1u << 5) & cu0) { // 3~4-length code point + if ((1u << 4) & cu0) { // 4-length code point + if (cu0 & (1u << 3)) { + throw TextEncodeException( + "Unexpected bad-format begin byte (not 0b11110xxx) of 4-byte" + "code point."); + } + + const CodePoint s0 = ExtractBits(cu0) + << (6 * 3); + const CodePoint s1 = read_next_folowing_code() << (6 * 2); + const CodePoint s2 = read_next_folowing_code() << 6; + const CodePoint s3 = read_next_folowing_code(); + result = s0 + s1 + s2 + s3; + } else { // 3-length code point + const CodePoint s0 = ExtractBits(cu0) + << (6 * 2); + const CodePoint s1 = read_next_folowing_code() << 6; + const CodePoint s2 = read_next_folowing_code(); + result = s0 + s1 + s2; + } + } else { // 2-length code point + const CodePoint s0 = ExtractBits(cu0) + << 6; + const CodePoint s1 = read_next_folowing_code(); + result = s0 + s1; + } + } else { + throw TextEncodeException( + "Unexpected bad-format (0b10xxxxxx) begin byte of a code point."); + } + } else { + result = static_cast(cu0); + } + } + + if (next_position != nullptr) *next_position = current; + return result; +} + +CodePoint Utf16NextCodePoint(std::u16string_view str, Index current, + Index* next_position) { + CodePoint result; + + if (current >= static_cast(str.length())) { + result = k_invalid_code_point; + } else { + const auto cu0 = str[current++]; + + if (!IsUtf16SurrogatePairCodeUnit(cu0)) { // 1-length code point + result = static_cast(cu0); + } else if (IsUtf16SurrogatePairLeading(cu0)) { // 2-length code point + if (current >= static_cast(str.length())) { + throw TextEncodeException( + "Unexpected end when reading second code unit of surrogate pair."); + } + const auto cu1 = str[current++]; + + if (!IsUtf16SurrogatePairTrailing(cu1)) { + throw TextEncodeException( + "Unexpected bad-range second code unit of surrogate pair."); + } + + const auto s0 = ExtractBits(cu0) << 10; + const auto s1 = ExtractBits(cu1); + + result = s0 + s1 + 0x10000; + + } else { + throw TextEncodeException( + "Unexpected bad-range first code unit of surrogate pair."); + } + } + + if (next_position != nullptr) *next_position = current; + return result; +} + +CodePoint Utf16PreviousCodePoint(std::u16string_view str, Index current, + Index* previous_position) { + CodePoint result; + if (current <= 0) { + result = k_invalid_code_point; + } else { + const auto cu0 = str[--current]; + + if (!IsUtf16SurrogatePairCodeUnit(cu0)) { // 1-length code point + result = static_cast(cu0); + } else if (IsUtf16SurrogatePairTrailing(cu0)) { // 2-length code point + if (current <= 0) { + throw TextEncodeException( + "Unexpected end when reading first code unit of surrogate pair."); + } + const auto cu1 = str[--current]; + + if (!IsUtf16SurrogatePairLeading(cu1)) { + throw TextEncodeException( + "Unexpected bad-range first code unit of surrogate pair."); + } + + const auto s0 = ExtractBits(cu1) << 10; + const auto s1 = ExtractBits(cu0); + + result = s0 + s1 + 0x10000; + + } else { + throw TextEncodeException( + "Unexpected bad-range second code unit of surrogate pair."); + } + } + + if (previous_position != nullptr) *previous_position = current; + return result; +} + +void Utf8EncodeCodePointAppend(CodePoint code_point, std::string& str) { + auto write_continue_byte = [&str](std::uint8_t byte6) { + str.push_back((1u << 7) + (((1u << 6) - 1) & byte6)); + }; + + if (code_point >= 0 && code_point <= 0x007F) { + str.push_back(static_cast(code_point)); + } else if (code_point >= 0x0080 && code_point <= 0x07FF) { + std::uint32_t unsigned_code_point = code_point; + str.push_back(static_cast(ExtractBits( + (unsigned_code_point >> 6)) + + 0b11000000)); + write_continue_byte( + ExtractBits(unsigned_code_point)); + } else if (code_point >= 0x0800 && code_point <= 0xFFFF) { + std::uint32_t unsigned_code_point = code_point; + str.push_back(static_cast(ExtractBits( + (unsigned_code_point >> (6 * 2))) + + 0b11100000)); + write_continue_byte( + ExtractBits(unsigned_code_point >> 6)); + write_continue_byte( + ExtractBits(unsigned_code_point)); + } else if (code_point >= 0x10000 && code_point <= 0x10FFFF) { + std::uint32_t unsigned_code_point = code_point; + str.push_back(static_cast(ExtractBits( + (unsigned_code_point >> (6 * 3))) + + 0b11110000)); + write_continue_byte(ExtractBits( + unsigned_code_point >> (6 * 2))); + write_continue_byte( + ExtractBits(unsigned_code_point >> 6)); + write_continue_byte( + ExtractBits(unsigned_code_point)); + } else { + throw TextEncodeException("Code point out of range."); + } +} + +void Utf16EncodeCodePointAppend(CodePoint code_point, std::u16string& str) { + if ((code_point >= 0 && code_point <= 0xD7FF) || + (code_point >= 0xE000 && code_point <= 0xFFFF)) { + str.push_back(static_cast(code_point)); + } else if (code_point >= 0x10000 && code_point <= 0x10FFFF) { + std::uint32_t u = code_point - 0x10000; + str.push_back(static_cast( + ExtractBits(u >> 10) + 0xD800u)); + str.push_back(static_cast( + ExtractBits(u) + 0xDC00u)); + } else { + throw TextEncodeException("Code point out of range."); + } +} + +std::string ToUtf8(std::u16string_view s) { + std::string result; + for (CodePoint cp : Utf16CodePointIterator{s}) { + Utf8EncodeCodePointAppend(cp, result); + } + return result; +} + +std::u16string ToUtf16(std::string_view s) { + std::u16string result; + for (CodePoint cp : Utf8CodePointIterator{s}) { + Utf16EncodeCodePointAppend(cp, result); + } + return result; +} + +#ifdef WIN32 +std::wstring ToUtf16WString(std::string_view s) { + std::u16string result; + for (CodePoint cp : Utf8CodePointIterator{s}) { + Utf16EncodeCodePointAppend(cp, result); + } + + std::wstring r(result.cbegin(), result.cend()); + return r; +} +#endif + +bool Utf16IsValidInsertPosition(std::u16string_view s, gsl::index position) { + if (position < 0) return false; + if (position > static_cast(s.size())) return false; + if (position == 0) return true; + if (position == static_cast(s.size())) return true; + return !IsUtf16SurrogatePairTrailing(s[position]); +} + +gsl::index Utf16BackwardUntil(std::u16string_view str, gsl::index position, + const std::function& predicate) { + if (position <= 0) return position; + while (true) { + gsl::index p = position; + auto c = Utf16PreviousCodePoint(str, p, &position); + if (predicate(c)) return p; + if (c == k_invalid_code_point) return p; + } + UnreachableCode(); +} + +gsl::index Utf16ForwardUntil(std::u16string_view str, gsl::index position, + const std::function& predicate) { + if (position >= static_cast(str.size())) return position; + while (true) { + gsl::index p = position; + auto c = Utf16NextCodePoint(str, p, &position); + if (predicate(c)) return p; + if (c == k_invalid_code_point) return p; + } + UnreachableCode(); +} + +inline bool IsSpace(CodePoint c) { return c == 0x20 || c == 0xA; } + +gsl::index Utf16PreviousWord(std::u16string_view str, gsl::index position, + bool* is_space) { + if (position <= 0) return position; + auto c = Utf16PreviousCodePoint(str, position, nullptr); + if (IsSpace(c)) { // TODO: Currently only test against 0x20(space). + if (is_space) *is_space = true; + return Utf16BackwardUntil(str, position, + [](CodePoint c) { return !IsSpace(c); }); + } else { + if (is_space) *is_space = false; + return Utf16BackwardUntil(str, position, IsSpace); + } +} + +gsl::index Utf16NextWord(std::u16string_view str, gsl::index position, + bool* is_space) { + if (position >= static_cast(str.size())) return position; + auto c = Utf16NextCodePoint(str, position, nullptr); + if (IsSpace(c)) { // TODO: Currently only test against 0x20(space). + if (is_space) *is_space = true; + return Utf16ForwardUntil(str, position, + [](CodePoint c) { return !IsSpace(c); }); + } else { + if (is_space) *is_space = false; + return Utf16ForwardUntil(str, position, IsSpace); + } +} + +char16_t ToLower(char16_t c) { + if (c >= u'A' && c <= u'Z') { + return c - u'A' + u'a'; + } + return c; +} + +char16_t ToUpper(char16_t c) { + if (c >= u'a' && c <= u'z') { + return c - u'a' + u'A'; + } + return c; +} + +std::u16string ToLower(std::u16string_view s) { + std::u16string result; + for (auto c : s) result.push_back(ToLower(c)); + return result; +} + +std::u16string ToUpper(std::u16string_view s) { + std::u16string result; + for (auto c : s) result.push_back(ToUpper(c)); + return result; +} +} // namespace cru diff --git a/works/life/computer-network-experiment/StringUtil.hpp b/works/life/computer-network-experiment/StringUtil.hpp new file mode 100644 index 0000000..1a9634a --- /dev/null +++ b/works/life/computer-network-experiment/StringUtil.hpp @@ -0,0 +1,157 @@ +#pragma once +#include "Base.hpp" + +#include +#include +#include + +namespace cru { +using CodePoint = std::int32_t; +constexpr CodePoint k_invalid_code_point = -1; + +class TextEncodeException : public std::runtime_error { + public: + using runtime_error::runtime_error; +}; + +inline bool IsUtf16SurrogatePairCodeUnit(char16_t c) { + return c >= 0xD800 && c <= 0xDFFF; +} + +inline bool IsUtf16SurrogatePairLeading(char16_t c) { + return c >= 0xD800 && c <= 0xDBFF; +} + +inline bool IsUtf16SurrogatePairTrailing(char16_t c) { + return c >= 0xDC00 && c <= 0xDFFF; +} + +CodePoint Utf8NextCodePoint(std::string_view str, Index current, + Index* next_position); + +CodePoint Utf16NextCodePoint(std::u16string_view str, Index current, + Index* next_position); +CodePoint Utf16PreviousCodePoint(std::u16string_view str, Index current, + Index* previous_position); + +template +using NextCodePointFunctionType = CodePoint (*)(StringType, Index, Index*); + +template NextCodePointFunction> +class CodePointIterator { + public: + using difference_type = Index; + using value_type = CodePoint; + using pointer = void; + using reference = value_type; + using iterator_category = std::forward_iterator_tag; + + public: + struct past_end_tag_t {}; + + explicit CodePointIterator(StringType string) + : string_(std::move(string)), position_(0) {} + explicit CodePointIterator(StringType string, past_end_tag_t) + : string_(std::move(string)), position_(string_.size()) {} + + CRU_DEFAULT_COPY(CodePointIterator) + CRU_DEFAULT_MOVE(CodePointIterator) + + ~CodePointIterator() = default; + + public: + StringType GetString() const { return string_; } + Index GetPosition() const { return position_; } + + bool IsPastEnd() const { + return position_ == static_cast(string_.size()); + } + + public: + CodePointIterator begin() const { return *this; } + CodePointIterator end() const { + return CodePointIterator{string_, past_end_tag_t{}}; + } + + public: + bool operator==(const CodePointIterator& other) const { + // You should compare iterator that iterate on the same string. + Expects(this->string_.data() == other.string_.data() && + this->string_.size() == other.string_.size()); + return this->position_ == other.position_; + } + bool operator!=(const CodePointIterator& other) const { + return !this->operator==(other); + } + + CodePointIterator& operator++() { + Expects(!IsPastEnd()); + Forward(); + return *this; + } + + CodePointIterator operator++(int) { + Expects(!IsPastEnd()); + CodePointIterator old = *this; + Forward(); + return old; + } + + CodePoint operator*() const { + return NextCodePointFunction(string_, position_, &next_position_cache_); + } + + private: + void Forward() { + if (next_position_cache_ > position_) { + position_ = next_position_cache_; + } else { + NextCodePointFunction(string_, position_, &position_); + } + } + + private: + StringType string_; + Index position_; + mutable Index next_position_cache_; +}; + +using Utf8CodePointIterator = + CodePointIterator; + +using Utf16CodePointIterator = + CodePointIterator; + +void Utf8EncodeCodePointAppend(CodePoint code_point, std::string& str); +void Utf16EncodeCodePointAppend(CodePoint code_point, std::u16string& str); + +std::string ToUtf8(std::u16string_view s); +std::u16string ToUtf16(std::string_view s); + +#ifdef WIN32 +std::wstring ToUtf16WString(std::string_view s); +#endif + +// If given s is not a valid utf16 string, return value is UD. +bool Utf16IsValidInsertPosition(std::u16string_view s, gsl::index position); + +// Return position after the character making predicate returns true or 0 if no +// character doing so. +gsl::index Utf16BackwardUntil(std::u16string_view str, gsl::index position, + const std::function& predicate); +// Return position before the character making predicate returns true or +// str.size() if no character doing so. +gsl::index Utf16ForwardUntil(std::u16string_view str, gsl::index position, + const std::function& predicate); + +gsl::index Utf16PreviousWord(std::u16string_view str, gsl::index position, + bool* is_space = nullptr); +gsl::index Utf16NextWord(std::u16string_view str, gsl::index position, + bool* is_space = nullptr); + +char16_t ToLower(char16_t c); +char16_t ToUpper(char16_t c); +std::u16string ToLower(std::u16string_view s); +std::u16string ToUpper(std::u16string_view s); +} // namespace cru diff --git a/works/life/computer-network-experiment/server.cpp b/works/life/computer-network-experiment/server.cpp index 15470f4..de5bcd3 100644 --- a/works/life/computer-network-experiment/server.cpp +++ b/works/life/computer-network-experiment/server.cpp @@ -3,30 +3,83 @@ */ #include -#include #include -#include #include #include #include #include +#include +#include +#include + #include #include +#include "StringUtil.hpp" +#include "fmt/core.h" + #pragma comment(lib, "Ws2_32.lib") const auto bind_address = "127.0.0.1"; // control bind address const u_short port = 1234; // control bind port -// As far as I know, cout is not thread safe. So we need a lock. But this might -// not be the best solution. We can use a queue instead to avoid block. -std::mutex cout_mutex; +enum class OutputType { Normal, Error }; + +struct Output { + Output() = default; + Output(std::wstring message, OutputType type = OutputType::Normal) + : message(std::move(message)), type(type) {} + + CRU_DEFAULT_COPY(Output) + CRU_DEFAULT_MOVE(Output) + ~Output() = default; + + std::wstring message; + OutputType type; +}; + +folly::MPMCQueue output_queue; + +void SendOutput(std::wstring output) { + output_queue.blockingWrite(std::move(output)); +} + +void SendOutput(Output output) { + output_queue.blockingWrite(std::move(output)); +} + +template +void SendOutput(std::wstring_view format, Args &&...args) { + output_queue.blockingWrite(fmt::format(format, std::forward(args)...)); +} + +template +void SendOutput(OutputType type, std::wstring_view format, Args &&...args) { + output_queue.blockingWrite( + {fmt::format(format, std::forward(args)...), type}); +} + +void OutputThread() { + while (true) { + Output output; + output_queue.blockingRead(output); + switch (output.type) { + case OutputType::Error: + std::wcerr << output.message; + break; + default: + std::wcout << output.message; + break; + } + } +} [[noreturn]] void PrintErrorMessageAndExit(std::wstring_view message, std::optional error_code = std::nullopt) { - std::wcerr << message << L'\n'; + + SendOutput(L"{}\n", message); if (error_code) { std::cerr << L"Error code is " << std::hex << *error_code << L'\n'; @@ -48,10 +101,7 @@ PrintErrorMessageAndExit(std::wstring_view message, void ResponseThreadProc(int socket, sockaddr_in address) { auto address_string = inet_ntoa(address.sin_addr); - { - std::lock_guard guard(cout_mutex); - std::cout << "Connected to " << address_string << "!\n"; - } + SendOutput(L"Connected to {}!\n", cru::ToUtf16WString(address_string)); const std::string_view buffer = "Love you!!! By crupest!"; @@ -69,7 +119,6 @@ void ResponseThreadProc(int socket, sockaddr_in address) { // send failed if (byte_actually_sent == SOCKET_ERROR) { - std::lock_guard guard(cout_mutex); std::cerr << "Failed to send!\n"; closesocket(socket); break; @@ -78,10 +127,8 @@ void ResponseThreadProc(int socket, sockaddr_in address) { byte_count_sent += byte_actually_sent; } - { - std::lock_guard guard(cout_mutex); - std::cout << "Succeeded to send message to " << address_string << "!\n"; - } + SendOutput(L"Succeeded to send message to {} !\n", + cru::ToUtf16WString(address_string)); closesocket(socket); } -- cgit v1.2.3