From 168c9a009eb68697c1b8328cfbf3a1a6ccae48f6 Mon Sep 17 00:00:00 2001 From: crupest Date: Tue, 29 Dec 2020 22:52:02 +0800 Subject: import(life): ... --- works/life/cpp-practicum/.gitignore | 595 ++++++++++++++++++++++++++++++++ works/life/cpp-practicum/Base.hpp | 63 ++++ works/life/cpp-practicum/CMakeLists.txt | 14 + works/life/cpp-practicum/StringUtil.cpp | 305 ++++++++++++++++ works/life/cpp-practicum/StringUtil.hpp | 148 ++++++++ works/life/cpp-practicum/main.cpp | 281 +++++++++++++++ 6 files changed, 1406 insertions(+) create mode 100644 works/life/cpp-practicum/.gitignore create mode 100644 works/life/cpp-practicum/Base.hpp create mode 100644 works/life/cpp-practicum/CMakeLists.txt create mode 100644 works/life/cpp-practicum/StringUtil.cpp create mode 100644 works/life/cpp-practicum/StringUtil.hpp create mode 100644 works/life/cpp-practicum/main.cpp (limited to 'works/life/cpp-practicum') diff --git a/works/life/cpp-practicum/.gitignore b/works/life/cpp-practicum/.gitignore new file mode 100644 index 0000000..9ad9ade --- /dev/null +++ b/works/life/cpp-practicum/.gitignore @@ -0,0 +1,595 @@ + +# Created by https://www.gitignore.io/api/c++,cmake,python,visualstudio,visualstudiocode +# Edit at https://www.gitignore.io/?templates=c++,cmake,python,visualstudio,visualstudiocode + +### C++ ### +# Prerequisites +*.d + +# Compiled Object files +*.slo +*.lo +*.o +*.obj + +# Precompiled Headers +*.gch +*.pch + +# Compiled Dynamic libraries +*.so +*.dylib +*.dll + +# Fortran module files +*.mod +*.smod + +# Compiled Static libraries +*.lai +*.la +*.a +*.lib + +# Executables +*.exe +*.out +*.app + +### CMake ### +CMakeLists.txt.user +CMakeCache.txt +CMakeFiles +CMakeScripts +Testing +Makefile +cmake_install.cmake +install_manifest.txt +compile_commands.json +CTestTestfile.cmake +_deps + +### CMake Patch ### +# External projects +*-prefix/ + +### Python ### +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don’t work, or not +# install all needed dependencies. +#Pipfile.lock + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +### VisualStudioCode ### +.vscode/* +!.vscode/c_cpp_properties.json +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json + +### VisualStudioCode Patch ### +# Ignore all local history of files +.history + +### VisualStudio ### +## Ignore Visual Studio temporary files, build results, and +## files generated by popular Visual Studio add-ons. +## +## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore + +# User-specific files +*.rsuser +*.suo +*.user +*.userosscache +*.sln.docstates + +# User-specific files (MonoDevelop/Xamarin Studio) +*.userprefs + +# Mono auto generated files +mono_crash.* + +# Build results +[Dd]ebug/ +[Dd]ebugPublic/ +[Rr]elease/ +[Rr]eleases/ +x64/ +x86/ +[Aa][Rr][Mm]/ +[Aa][Rr][Mm]64/ +bld/ +[Bb]in/ +[Oo]bj/ +[Ll]og/ + +# Visual Studio 2015/2017 cache/options directory +.vs/ +# Uncomment if you have tasks that create the project's static files in wwwroot +#wwwroot/ + +# Visual Studio 2017 auto generated files +Generated\ Files/ + +# MSTest test Results +[Tt]est[Rr]esult*/ +[Bb]uild[Ll]og.* + +# NUNIT +*.VisualState.xml +TestResult.xml + +# Build Results of an ATL Project +[Dd]ebugPS/ +[Rr]eleasePS/ +dlldata.c + +# Benchmark Results +BenchmarkDotNet.Artifacts/ + +# .NET Core +project.lock.json +project.fragment.lock.json +artifacts/ + +# StyleCop +StyleCopReport.xml + +# Files built by Visual Studio +*_i.c +*_p.c +*_h.h +*.ilk +*.meta +*.iobj +*.pdb +*.ipdb +*.pgc +*.pgd +*.rsp +*.sbr +*.tlb +*.tli +*.tlh +*.tmp +*.tmp_proj +*_wpftmp.csproj +*.vspscc +*.vssscc +.builds +*.pidb +*.svclog +*.scc + +# Chutzpah Test files +_Chutzpah* + +# Visual C++ cache files +ipch/ +*.aps +*.ncb +*.opendb +*.opensdf +*.sdf +*.cachefile +*.VC.db +*.VC.VC.opendb + +# Visual Studio profiler +*.psess +*.vsp +*.vspx +*.sap + +# Visual Studio Trace Files +*.e2e + +# TFS 2012 Local Workspace +$tf/ + +# Guidance Automation Toolkit +*.gpState + +# ReSharper is a .NET coding add-in +_ReSharper*/ +*.[Rr]e[Ss]harper +*.DotSettings.user + +# JustCode is a .NET coding add-in +.JustCode + +# TeamCity is a build add-in +_TeamCity* + +# DotCover is a Code Coverage Tool +*.dotCover + +# AxoCover is a Code Coverage Tool +.axoCover/* +!.axoCover/settings.json + +# Visual Studio code coverage results +*.coverage +*.coveragexml + +# NCrunch +_NCrunch_* +.*crunch*.local.xml +nCrunchTemp_* + +# MightyMoose +*.mm.* +AutoTest.Net/ + +# Web workbench (sass) +.sass-cache/ + +# Installshield output folder +[Ee]xpress/ + +# DocProject is a documentation generator add-in +DocProject/buildhelp/ +DocProject/Help/*.HxT +DocProject/Help/*.HxC +DocProject/Help/*.hhc +DocProject/Help/*.hhk +DocProject/Help/*.hhp +DocProject/Help/Html2 +DocProject/Help/html + +# Click-Once directory +publish/ + +# Publish Web Output +*.[Pp]ublish.xml +*.azurePubxml +# Note: Comment the next line if you want to checkin your web deploy settings, +# but database connection strings (with potential passwords) will be unencrypted +*.pubxml +*.publishproj + +# Microsoft Azure Web App publish settings. Comment the next line if you want to +# checkin your Azure Web App publish settings, but sensitive information contained +# in these scripts will be unencrypted +PublishScripts/ + +# NuGet Packages +*.nupkg +# The packages folder can be ignored because of Package Restore +**/[Pp]ackages/* +# except build/, which is used as an MSBuild target. +!**/[Pp]ackages/build/ +# Uncomment if necessary however generally it will be regenerated when needed +#!**/[Pp]ackages/repositories.config +# NuGet v3's project.json files produces more ignorable files +*.nuget.props +*.nuget.targets + +# Microsoft Azure Build Output +csx/ +*.build.csdef + +# Microsoft Azure Emulator +ecf/ +rcf/ + +# Windows Store app package directories and files +AppPackages/ +BundleArtifacts/ +Package.StoreAssociation.xml +_pkginfo.txt +*.appx +*.appxbundle +*.appxupload + +# Visual Studio cache files +# files ending in .cache can be ignored +*.[Cc]ache +# but keep track of directories ending in .cache +!?*.[Cc]ache/ + +# Others +ClientBin/ +~$* +*~ +*.dbmdl +*.dbproj.schemaview +*.jfm +*.pfx +*.publishsettings +orleans.codegen.cs + +# Including strong name files can present a security risk +# (https://github.com/github/gitignore/pull/2483#issue-259490424) +#*.snk + +# Since there are multiple workflows, uncomment next line to ignore bower_components +# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) +#bower_components/ + +# RIA/Silverlight projects +Generated_Code/ + +# Backup & report files from converting an old project file +# to a newer Visual Studio version. Backup files are not needed, +# because we have git ;-) +_UpgradeReport_Files/ +Backup*/ +UpgradeLog*.XML +UpgradeLog*.htm +ServiceFabricBackup/ +*.rptproj.bak + +# SQL Server files +*.mdf +*.ldf +*.ndf + +# Business Intelligence projects +*.rdl.data +*.bim.layout +*.bim_*.settings +*.rptproj.rsuser +*- Backup*.rdl + +# Microsoft Fakes +FakesAssemblies/ + +# GhostDoc plugin setting file +*.GhostDoc.xml + +# Node.js Tools for Visual Studio +.ntvs_analysis.dat +node_modules/ + +# Visual Studio 6 build log +*.plg + +# Visual Studio 6 workspace options file +*.opt + +# Visual Studio 6 auto-generated workspace file (contains which files were open etc.) +*.vbw + +# Visual Studio LightSwitch build output +**/*.HTMLClient/GeneratedArtifacts +**/*.DesktopClient/GeneratedArtifacts +**/*.DesktopClient/ModelManifest.xml +**/*.Server/GeneratedArtifacts +**/*.Server/ModelManifest.xml +_Pvt_Extensions + +# Paket dependency manager +.paket/paket.exe +paket-files/ + +# FAKE - F# Make +.fake/ + +# CodeRush personal settings +.cr/personal + +# Python Tools for Visual Studio (PTVS) +*.pyc + +# Cake - Uncomment if you are using it +# tools/** +# !tools/packages.config + +# Tabs Studio +*.tss + +# Telerik's JustMock configuration file +*.jmconfig + +# BizTalk build output +*.btp.cs +*.btm.cs +*.odx.cs +*.xsd.cs + +# OpenCover UI analysis results +OpenCover/ + +# Azure Stream Analytics local run output +ASALocalRun/ + +# MSBuild Binary and Structured Log +*.binlog + +# NVidia Nsight GPU debugger configuration file +*.nvuser + +# MFractors (Xamarin productivity tool) working folder +.mfractor/ + +# Local History for Visual Studio +.localhistory/ + +# BeatPulse healthcheck temp database +healthchecksdb + +# Backup folder for Package Reference Convert tool in Visual Studio 2017 +MigrationBackup/ + +# End of https://www.gitignore.io/api/c++,cmake,python,visualstudio,visualstudiocode + +# Created by https://www.gitignore.io/api/emacs +# Edit at https://www.gitignore.io/?templates=emacs + +### Emacs ### +# -*- mode: gitignore; -*- +*~ +\#*\# +/.emacs.desktop +/.emacs.desktop.lock +*.elc +auto-save-list +tramp +.\#* + +# Org-mode +.org-id-locations +*_archive + +# flymake-mode +*_flymake.* + +# eshell files +/eshell/history +/eshell/lastdir + +# elpa packages +/elpa/ + +# reftex files +*.rel + +# AUCTeX auto folder +/auto/ + +# cask packages +.cask/ +dist/ + +# Flycheck +flycheck_*.el + +# server auth directory +/server/ + +# projectiles files +.projectile + +# directory configuration +.dir-locals.el + +# network security +/network-security.data + + +# End of https://www.gitignore.io/api/emacs + +compile_flags.txt +.clangd +.kdev4 +CruUI.kdev4 diff --git a/works/life/cpp-practicum/Base.hpp b/works/life/cpp-practicum/Base.hpp new file mode 100644 index 0000000..f26b81f --- /dev/null +++ b/works/life/cpp-practicum/Base.hpp @@ -0,0 +1,63 @@ +#pragma once + +#include +#include +// #include +#include + +// Now we have no gsl but we need gsl::index +namespace gsl { +using index = std::ptrdiff_t; +} + +#define CRU_UNUSED(entity) static_cast(entity); + +#define CRU__CONCAT(a, b) a##b +#define CRU_MAKE_UNICODE_LITERAL(str) CRU__CONCAT(u, #str) + +#define CRU_DEFAULT_COPY(classname) \ + classname(const classname &) = default; \ + classname &operator=(const classname &) = default; + +#define CRU_DEFAULT_MOVE(classname) \ + classname(classname &&) = default; \ + classname &operator=(classname &&) = default; + +#define CRU_DELETE_COPY(classname) \ + classname(const classname &) = delete; \ + classname &operator=(const classname &) = delete; + +#define CRU_DELETE_MOVE(classname) \ + classname(classname &&) = delete; \ + classname &operator=(classname &&) = delete; + +namespace cru { +class Object { +public: + Object() = default; + CRU_DEFAULT_COPY(Object) + CRU_DEFAULT_MOVE(Object) + virtual ~Object() = default; +}; + +struct Interface { + Interface() = default; + CRU_DELETE_COPY(Interface) + CRU_DELETE_MOVE(Interface) + virtual ~Interface() = default; +}; + +[[noreturn]] inline void UnreachableCode() { std::terminate(); } + +using Index = gsl::index; + +// https://www.boost.org/doc/libs/1_54_0/doc/html/hash/reference.html#boost.hash_combine +template inline void hash_combine(std::size_t &s, const T &v) { + std::hash h; + s ^= h(v) + 0x9e3779b9 + (s << 6) + (s >> 2); +} + +#define CRU_DEFINE_CLASS_LOG_TAG(tag) \ +private: \ + constexpr static std::u16string_view log_tag = tag; +} // namespace cru diff --git a/works/life/cpp-practicum/CMakeLists.txt b/works/life/cpp-practicum/CMakeLists.txt new file mode 100644 index 0000000..fbf1c13 --- /dev/null +++ b/works/life/cpp-practicum/CMakeLists.txt @@ -0,0 +1,14 @@ +cmake_minimum_required(VERSION 3.14) + +project(CppPraticum) + +enable_testing() + +set(CMAKE_CXX_STANDARD 17) + +if (MSVC) + string(REGEX REPLACE "/W[0-4]\\s*" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) + add_compile_options(/utf-8 /W4 /WX) +endif() + +add_executable(main main.cpp Base.hpp StringUtil.hpp StringUtil.cpp) diff --git a/works/life/cpp-practicum/StringUtil.cpp b/works/life/cpp-practicum/StringUtil.cpp new file mode 100644 index 0000000..e6cd377 --- /dev/null +++ b/works/life/cpp-practicum/StringUtil.cpp @@ -0,0 +1,305 @@ +#include "StringUtil.hpp" +#include "Base.hpp" + +namespace cru { +namespace { +template +inline std::enable_if_t, ReturnType> +ExtractBits(UInt n) { + return static_cast(n & ((1u << number_of_bit) - 1)); +} +} // namespace + +CodePoint Utf8NextCodePoint(std::string_view str, Index current, + Index *next_position) { + CodePoint result; + + if (current >= static_cast(str.length())) { + result = k_invalid_code_point; + } else { + const auto cu0 = static_cast(str[current++]); + + auto read_next_folowing_code = [&str, ¤t]() -> CodePoint { + if (current == static_cast(str.length())) + throw TextEncodeException( + "Unexpected end when read continuing byte of multi-byte code " + "point."); + + const auto u = static_cast(str[current]); + if (!(u & (1u << 7)) || (u & (1u << 6))) { + throw TextEncodeException( + "Unexpected bad-format (not 0b10xxxxxx) continuing byte of " + "multi-byte code point."); + } + + return ExtractBits(str[current++]); + }; + + if ((1u << 7) & cu0) { + if ((1u << 6) & cu0) { // 2~4-length code point + if ((1u << 5) & cu0) { // 3~4-length code point + if ((1u << 4) & cu0) { // 4-length code point + if (cu0 & (1u << 3)) { + throw TextEncodeException( + "Unexpected bad-format begin byte (not 0b11110xxx) of 4-byte" + "code point."); + } + + const CodePoint s0 = ExtractBits(cu0) + << (6 * 3); + const CodePoint s1 = read_next_folowing_code() << (6 * 2); + const CodePoint s2 = read_next_folowing_code() << 6; + const CodePoint s3 = read_next_folowing_code(); + result = s0 + s1 + s2 + s3; + } else { // 3-length code point + const CodePoint s0 = ExtractBits(cu0) + << (6 * 2); + const CodePoint s1 = read_next_folowing_code() << 6; + const CodePoint s2 = read_next_folowing_code(); + result = s0 + s1 + s2; + } + } else { // 2-length code point + const CodePoint s0 = ExtractBits(cu0) + << 6; + const CodePoint s1 = read_next_folowing_code(); + result = s0 + s1; + } + } else { + throw TextEncodeException( + "Unexpected bad-format (0b10xxxxxx) begin byte of a code point."); + } + } else { + result = static_cast(cu0); + } + } + + if (next_position != nullptr) + *next_position = current; + return result; +} + +CodePoint Utf16NextCodePoint(std::u16string_view str, Index current, + Index *next_position) { + CodePoint result; + + if (current >= static_cast(str.length())) { + result = k_invalid_code_point; + } else { + const auto cu0 = str[current++]; + + if (!IsUtf16SurrogatePairCodeUnit(cu0)) { // 1-length code point + result = static_cast(cu0); + } else if (IsUtf16SurrogatePairLeading(cu0)) { // 2-length code point + if (current >= static_cast(str.length())) { + throw TextEncodeException( + "Unexpected end when reading second code unit of surrogate pair."); + } + const auto cu1 = str[current++]; + + if (!IsUtf16SurrogatePairTrailing(cu1)) { + throw TextEncodeException( + "Unexpected bad-range second code unit of surrogate pair."); + } + + const auto s0 = ExtractBits(cu0) << 10; + const auto s1 = ExtractBits(cu1); + + result = s0 + s1 + 0x10000; + + } else { + throw TextEncodeException( + "Unexpected bad-range first code unit of surrogate pair."); + } + } + + if (next_position != nullptr) + *next_position = current; + return result; +} + +CodePoint Utf16PreviousCodePoint(std::u16string_view str, Index current, + Index *previous_position) { + CodePoint result; + if (current <= 0) { + result = k_invalid_code_point; + } else { + const auto cu0 = str[--current]; + + if (!IsUtf16SurrogatePairCodeUnit(cu0)) { // 1-length code point + result = static_cast(cu0); + } else if (IsUtf16SurrogatePairTrailing(cu0)) { // 2-length code point + if (current <= 0) { + throw TextEncodeException( + "Unexpected end when reading first code unit of surrogate pair."); + } + const auto cu1 = str[--current]; + + if (!IsUtf16SurrogatePairLeading(cu1)) { + throw TextEncodeException( + "Unexpected bad-range first code unit of surrogate pair."); + } + + const auto s0 = ExtractBits(cu1) << 10; + const auto s1 = ExtractBits(cu0); + + result = s0 + s1 + 0x10000; + + } else { + throw TextEncodeException( + "Unexpected bad-range second code unit of surrogate pair."); + } + } + + if (previous_position != nullptr) + *previous_position = current; + return result; +} + +void Utf8EncodeCodePointAppend(CodePoint code_point, std::string &str) { + auto write_continue_byte = [&str](std::uint8_t byte6) { + str.push_back((1u << 7) + (((1u << 6) - 1) & byte6)); + }; + + if (code_point >= 0 && code_point <= 0x007F) { + str.push_back(static_cast(code_point)); + } else if (code_point >= 0x0080 && code_point <= 0x07FF) { + std::uint32_t unsigned_code_point = code_point; + str.push_back(static_cast(ExtractBits( + (unsigned_code_point >> 6)) + + 0b11000000)); + write_continue_byte( + ExtractBits(unsigned_code_point)); + } else if (code_point >= 0x0800 && code_point <= 0xFFFF) { + std::uint32_t unsigned_code_point = code_point; + str.push_back(static_cast(ExtractBits( + (unsigned_code_point >> (6 * 2))) + + 0b11100000)); + write_continue_byte( + ExtractBits(unsigned_code_point >> 6)); + write_continue_byte( + ExtractBits(unsigned_code_point)); + } else if (code_point >= 0x10000 && code_point <= 0x10FFFF) { + std::uint32_t unsigned_code_point = code_point; + str.push_back(static_cast(ExtractBits( + (unsigned_code_point >> (6 * 3))) + + 0b11110000)); + write_continue_byte(ExtractBits( + unsigned_code_point >> (6 * 2))); + write_continue_byte( + ExtractBits(unsigned_code_point >> 6)); + write_continue_byte( + ExtractBits(unsigned_code_point)); + } else { + throw TextEncodeException("Code point out of range."); + } +} + +void Utf16EncodeCodePointAppend(CodePoint code_point, std::u16string &str) { + if ((code_point >= 0 && code_point <= 0xD7FF) || + (code_point >= 0xE000 && code_point <= 0xFFFF)) { + str.push_back(static_cast(code_point)); + } else if (code_point >= 0x10000 && code_point <= 0x10FFFF) { + std::uint32_t u = code_point - 0x10000; + str.push_back(static_cast( + ExtractBits(u >> 10) + 0xD800u)); + str.push_back(static_cast( + ExtractBits(u) + 0xDC00u)); + } else { + throw TextEncodeException("Code point out of range."); + } +} + +std::string ToUtf8(std::u16string_view s) { + std::string result; + for (CodePoint cp : Utf16CodePointIterator{s}) { + Utf8EncodeCodePointAppend(cp, result); + } + return result; +} + +std::u16string ToUtf16(std::string_view s) { + std::u16string result; + for (CodePoint cp : Utf8CodePointIterator{s}) { + Utf16EncodeCodePointAppend(cp, result); + } + return result; +} + +bool Utf16IsValidInsertPosition(std::u16string_view s, gsl::index position) { + if (position < 0) + return false; + if (position > static_cast(s.size())) + return false; + if (position == 0) + return true; + if (position == static_cast(s.size())) + return true; + return !IsUtf16SurrogatePairTrailing(s[position]); +} + +gsl::index Utf16BackwardUntil(std::u16string_view str, gsl::index position, + const std::function &predicate) { + if (position <= 0) + return position; + while (true) { + gsl::index p = position; + auto c = Utf16PreviousCodePoint(str, p, &position); + if (predicate(c)) + return p; + if (c == k_invalid_code_point) + return p; + } + UnreachableCode(); +} + +gsl::index Utf16ForwardUntil(std::u16string_view str, gsl::index position, + const std::function &predicate) { + if (position >= static_cast(str.size())) + return position; + while (true) { + gsl::index p = position; + auto c = Utf16NextCodePoint(str, p, &position); + if (predicate(c)) + return p; + if (c == k_invalid_code_point) + return p; + } + UnreachableCode(); +} + +inline bool IsSpace(CodePoint c) { return c == 0x20 || c == 0xA; } + +gsl::index Utf16PreviousWord(std::u16string_view str, gsl::index position, + bool *is_space) { + if (position <= 0) + return position; + auto c = Utf16PreviousCodePoint(str, position, nullptr); + if (IsSpace(c)) { // TODO: Currently only test against 0x20(space). + if (is_space) + *is_space = true; + return Utf16BackwardUntil(str, position, + [](CodePoint c) { return !IsSpace(c); }); + } else { + if (is_space) + *is_space = false; + return Utf16BackwardUntil(str, position, IsSpace); + } +} + +gsl::index Utf16NextWord(std::u16string_view str, gsl::index position, + bool *is_space) { + if (position >= static_cast(str.size())) + return position; + auto c = Utf16NextCodePoint(str, position, nullptr); + if (IsSpace(c)) { // TODO: Currently only test against 0x20(space). + if (is_space) + *is_space = true; + return Utf16ForwardUntil(str, position, + [](CodePoint c) { return !IsSpace(c); }); + } else { + if (is_space) + *is_space = false; + return Utf16ForwardUntil(str, position, IsSpace); + } +} +} // namespace cru diff --git a/works/life/cpp-practicum/StringUtil.hpp b/works/life/cpp-practicum/StringUtil.hpp new file mode 100644 index 0000000..d7b6cc9 --- /dev/null +++ b/works/life/cpp-practicum/StringUtil.hpp @@ -0,0 +1,148 @@ +#pragma once +#include "Base.hpp" + +#include +#include +#include + +namespace cru { +using CodePoint = std::int32_t; +constexpr CodePoint k_invalid_code_point = -1; + +class TextEncodeException : public std::runtime_error { +public: + using runtime_error::runtime_error; +}; + +inline bool IsUtf16SurrogatePairCodeUnit(char16_t c) { + return c >= 0xD800 && c <= 0xDFFF; +} + +inline bool IsUtf16SurrogatePairLeading(char16_t c) { + return c >= 0xD800 && c <= 0xDBFF; +} + +inline bool IsUtf16SurrogatePairTrailing(char16_t c) { + return c >= 0xDC00 && c <= 0xDFFF; +} + +CodePoint Utf8NextCodePoint(std::string_view str, Index current, + Index *next_position); + +CodePoint Utf16NextCodePoint(std::u16string_view str, Index current, + Index *next_position); +CodePoint Utf16PreviousCodePoint(std::u16string_view str, Index current, + Index *previous_position); + +template +using NextCodePointFunctionType = CodePoint (*)(StringType, Index, Index *); + +template NextCodePointFunction> +class CodePointIterator { +public: + using difference_type = Index; + using value_type = CodePoint; + using pointer = void; + using reference = value_type; + using iterator_category = std::forward_iterator_tag; + +public: + struct past_end_tag_t {}; + + explicit CodePointIterator(StringType string) + : string_(std::move(string)), position_(0) {} + explicit CodePointIterator(StringType string, past_end_tag_t) + : string_(std::move(string)), position_(string_.size()) {} + + CRU_DEFAULT_COPY(CodePointIterator) + CRU_DEFAULT_MOVE(CodePointIterator) + + ~CodePointIterator() = default; + +public: + StringType GetString() const { return string_; } + Index GetPosition() const { return position_; } + + bool IsPastEnd() const { + return position_ == static_cast(string_.size()); + } + +public: + CodePointIterator begin() const { return *this; } + CodePointIterator end() const { + return CodePointIterator{string_, past_end_tag_t{}}; + } + +public: + bool operator==(const CodePointIterator &other) const { + // You should compare iterator that iterate on the same string. + Expects(this->string_.data() == other.string_.data() && + this->string_.size() == other.string_.size()); + return this->position_ == other.position_; + } + bool operator!=(const CodePointIterator &other) const { + return !this->operator==(other); + } + + CodePointIterator &operator++() { + Expects(!IsPastEnd()); + Forward(); + return *this; + } + + CodePointIterator operator++(int) { + Expects(!IsPastEnd()); + CodePointIterator old = *this; + Forward(); + return old; + } + + CodePoint operator*() const { + return NextCodePointFunction(string_, position_, &next_position_cache_); + } + +private: + void Forward() { + if (next_position_cache_ > position_) { + position_ = next_position_cache_; + } else { + NextCodePointFunction(string_, position_, &position_); + } + } + +private: + StringType string_; + Index position_; + mutable Index next_position_cache_; +}; + +using Utf8CodePointIterator = + CodePointIterator; + +using Utf16CodePointIterator = + CodePointIterator; + +void Utf8EncodeCodePointAppend(CodePoint code_point, std::string &str); +void Utf16EncodeCodePointAppend(CodePoint code_point, std::u16string &str); + +std::string ToUtf8(std::u16string_view s); +std::u16string ToUtf16(std::string_view s); + +// If given s is not a valid utf16 string, return value is UD. +bool Utf16IsValidInsertPosition(std::u16string_view s, gsl::index position); + +// Return position after the character making predicate returns true or 0 if no +// character doing so. +gsl::index Utf16BackwardUntil(std::u16string_view str, gsl::index position, + const std::function &predicate); +// Return position before the character making predicate returns true or +// str.size() if no character doing so. +gsl::index Utf16ForwardUntil(std::u16string_view str, gsl::index position, + const std::function &predicate); + +gsl::index Utf16PreviousWord(std::u16string_view str, gsl::index position, + bool *is_space = nullptr); +gsl::index Utf16NextWord(std::u16string_view str, gsl::index position, + bool *is_space = nullptr); +} // namespace cru diff --git a/works/life/cpp-practicum/main.cpp b/works/life/cpp-practicum/main.cpp new file mode 100644 index 0000000..0f90b74 --- /dev/null +++ b/works/life/cpp-practicum/main.cpp @@ -0,0 +1,281 @@ +#include "Base.hpp" +#include "StringUtil.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include + +inline std::u16string_view ToUtf16View(const std::wstring &str) { + return std::u16string_view(reinterpret_cast(str.c_str()), + str.size()); +} + +inline std::wstring_view ToWStringView(std::u16string_view str) { + return std::wstring_view(reinterpret_cast(str.data()), + str.size()); +} + +class SerializationException : public std::runtime_error { +public: + using runtime_error::runtime_error; +}; + +std::vector SplitByVerticalLine(std::string_view str, + int field_count) { + std::vector fields; + gsl::index current_position = 0; + + for (int i = 0; i < field_count - 1; i++) { + auto pos = str.find('|', current_position); + if (pos == std::string::npos) { + throw SerializationException("Failed to find next splitter('|')."); + } + fields.push_back(str.substr(current_position, pos - current_position)); + current_position = pos + 1; + } + fields.push_back(str.substr(current_position)); + + return fields; +} + +class Book final { +public: + Book() = default; + + Book(std::u16string isbn, std::u16string title, std::u16string type, + std::u16string author, std::u16string press, int stock_count) + : isbn_(std::move(isbn)), title_(std::move(title)), + type_(std::move(type)), author_(std::move(author)), + press_(std::move(press)), stock_count_(stock_count) {} + + CRU_DEFAULT_COPY(Book) + CRU_DEFAULT_MOVE(Book) + + ~Book() = default; + +public: + std::u16string GetIsbn() const { return isbn_; } + void SetIsbn(std::u16string isbn) { isbn_ = std::move(isbn); } + + std::u16string GetTitle() const { return title_; } + void SetTitle(std::u16string title) { title_ = std::move(title); } + + std::u16string GetType() const { return type_; } + void SetType(std::u16string type) { type_ = std::move(type); } + + std::u16string GetAuthor() const { return author_; } + void SetAuthor(std::u16string author) { author_ = std::move(author); } + + std::u16string GetPress() const { return press_; } + void SetPress(std::u16string press) { press_ = std::move(press); } + + int GetStockCount() const { return stock_count_; } + void SetStockCount(int stock_count) { stock_count_ = stock_count; } + +private: + std::u16string isbn_; + std::u16string title_; + std::u16string type_; + std::u16string author_; + std::u16string press_; + int stock_count_; +}; + +std::istream &operator>>(std::istream &left, Book &right) { + std::string line; + std::getline(left, line); + + std::vector fields = SplitByVerticalLine(line, 6); + + right.SetIsbn(cru::ToUtf16(fields[0])); + right.SetTitle(cru::ToUtf16(fields[1])); + right.SetType(cru::ToUtf16(fields[2])); + right.SetAuthor(cru::ToUtf16(fields[3])); + right.SetPress(cru::ToUtf16(fields[4])); + right.SetStockCount(std::stoi(std::string(fields[5]))); + + return left; +} + +std::ostream &operator<<(std::ostream &left, const Book &right) { + left << cru::ToUtf8(right.GetIsbn()) << '|' << cru::ToUtf8(right.GetTitle()) + << '|' << cru::ToUtf8(right.GetType()) << '|' + << cru::ToUtf8(right.GetAuthor()) << '|' << cru::ToUtf8(right.GetPress()) + << '|' << right.GetStockCount(); + return left; +} + +void PrettyPrint(std::wostream &stream, const Book &book) { + stream << L"ISBN: " << ToWStringView(book.GetIsbn()) << L"\n"; + stream << L"标题: " << ToWStringView(book.GetTitle()) << L"\n"; + stream << L"类型: " << ToWStringView(book.GetType()) << L"\n"; + stream << L"作者: " << ToWStringView(book.GetAuthor()) << L"\n"; + stream << L"出版社: " << ToWStringView(book.GetPress()) << L"\n"; + stream << L"库存: " << book.GetStockCount() << L"\n"; +} + +class Vendor final { +public: + Vendor() = default; + Vendor(int id, std::u16string name, std::u16string type, + std::u16string address, std::u16string phone) + : id_(id), name_(std::move(name)), type_(std::move(type)), + address_(std::move(address)), phone_(std::move(phone)) {} + + CRU_DEFAULT_COPY(Vendor) + CRU_DEFAULT_MOVE(Vendor) + + ~Vendor() = default; + +public: + int GetId() const { return id_; } + void SetId(int id) { id_ = id; } + + std::u16string GetName() const { return name_; } + void SetName(std::u16string name) { name_ = std::move(name); } + + std::u16string GetType() const { return type_; } + void SetType(std::u16string type) { type_ = std::move(type); } + + std::u16string GetAddress() const { return address_; } + void SetAddress(std::u16string address) { address_ = std::move(address); } + + std::u16string GetPhone() const { return phone_; } + void SetPhone(std::u16string phone) { phone_ = std::move(phone); } + +private: + int id_; + std::u16string name_; + std::u16string type_; + std::u16string address_; + std::u16string phone_; +}; + +std::istream &operator>>(std::istream &left, Vendor &right) { + std::string line; + std::getline(left, line); + + std::vector fields = SplitByVerticalLine(line, 5); + + right.SetId(std::stoi(std::string(fields[0]))); + right.SetName(cru::ToUtf16(fields[1])); + right.SetType(cru::ToUtf16(fields[2])); + right.SetAddress(cru::ToUtf16(fields[3])); + right.SetPhone(cru::ToUtf16(fields[4])); + + return left; +} + +std::ostream &operator<<(std::ostream &left, const Vendor &right) { + left << right.GetId() << '|' << cru::ToUtf8(right.GetName()) << '|' + << cru::ToUtf8(right.GetType()) << '|' << cru::ToUtf8(right.GetAddress()) + << '|' << cru::ToUtf8(right.GetPhone()); + return left; +} + +class Record final { +public: + Record(); + + CRU_DEFAULT_COPY(Record); + CRU_DEFAULT_MOVE(Record); + + ~Record() = default; + +public: + void WriteTo(std::ostream &stream); + void ReadFrom(std::istream &stream); + + const std::vector &GetBooks() const { return books_; } + const std::vector &GetVendors() const { return vendors_; } + + // TODO: Implementation + std::optional FindBookByIsbn(std::u16string_view isbn); + + // TODO: Implementation + void RemoveBookByIsbn(std::u16string_view isbn); + +private: + std::vector books_; + std::vector vendors_; +}; + +void Record::WriteTo(std::ostream &stream) { + stream << books_.size() << ' ' << vendors_.size() << '\n'; + for (const auto &book : books_) { + stream << book << '\n'; + } + for (const auto &vendor : vendors_) { + stream << vendor << '\n'; + } +} + +void Record::ReadFrom(std::istream &stream) { + books_.clear(); + vendors_.clear(); + int book_count, vendor_count; + stream >> book_count >> vendor_count; + stream >> std::ws; + for (int i = 0; i < book_count; i++) { + Book book; + stream >> book; + books_.push_back(std::move(book)); + } + for (int i = 0; i < vendor_count; i++) { + Vendor vendor; + stream >> vendor; + vendors_.push_back(std::move(vendor)); + } +} + +int main() { + Record record; + + while (true) { + std::wcout << L"1. 查询 2. 添加 0. 退出\n"; + int choice = 0; + std::wcin >> choice; + if (choice == 1) { + std::wcout + << L"1. 图书 2. 供应商\n输入数字选择操作,其他任意字符将退出程序。\n"; + choice = 0; + std::wcin >> choice; + if (choice == 1) { + std::wcout << L"请输入图书编号:\n"; + std::wstring isbn; + std::wcin >> isbn; + auto find_result = record.FindBookByIsbn(ToUtf16View(isbn)); + if (find_result) { + PrettyPrint(std::wcout, *find_result); + std::wcout << L"0. 返回主菜单 1. 修改 2. 删除\n"; + choice = 0; + std::wcin >> choice; + if (choice == 1) { + // TODO: + } else if (choice == 2) { + record.RemoveBookByIsbn(ToUtf16View(isbn)); + std::wcout << L"删除成功。\n"; + } + } else { + std::wcout << L"该书不存在。\n"; + } + } else if (choice == 2) { + + } else { + return 0; + } + + } else if (choice == 2) { + + } else { + return 0; + } + } + + return 0; +} -- cgit v1.2.3