aboutsummaryrefslogtreecommitdiff
path: root/works/life/computer-network-experiment/StringUtil.hpp
blob: b0ca675144b5b67d446a132662b51349e9049655 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
#pragma once
#include "Base.hpp"

#include <functional>
#include <string>
#include <string_view>

namespace cru {
using CodePoint = std::int32_t;
constexpr CodePoint k_invalid_code_point = -1;

class TextEncodeException : public std::runtime_error {
 public:
  using runtime_error::runtime_error;
};

inline bool IsUtf16SurrogatePairCodeUnit(char16_t c) {
  return c >= 0xD800 && c <= 0xDFFF;
}

inline bool IsUtf16SurrogatePairLeading(char16_t c) {
  return c >= 0xD800 && c <= 0xDBFF;
}

inline bool IsUtf16SurrogatePairTrailing(char16_t c) {
  return c >= 0xDC00 && c <= 0xDFFF;
}

CodePoint Utf8NextCodePoint(std::string_view str, Index current,
                            Index* next_position);

CodePoint Utf16NextCodePoint(std::u16string_view str, Index current,
                             Index* next_position);
CodePoint Utf16PreviousCodePoint(std::u16string_view str, Index current,
                                 Index* previous_position);

template <typename StringType>
using NextCodePointFunctionType = CodePoint (*)(StringType, Index, Index*);

template <typename StringType,
          NextCodePointFunctionType<StringType> NextCodePointFunction>
class CodePointIterator {
 public:
  using difference_type = Index;
  using value_type = CodePoint;
  using pointer = void;
  using reference = value_type;
  using iterator_category = std::forward_iterator_tag;

 public:
  struct past_end_tag_t {};

  explicit CodePointIterator(StringType string)
      : string_(std::move(string)), position_(0) {}
  explicit CodePointIterator(StringType string, past_end_tag_t)
      : string_(std::move(string)), position_(string_.size()) {}

  CRU_DEFAULT_COPY(CodePointIterator)
  CRU_DEFAULT_MOVE(CodePointIterator)

  ~CodePointIterator() = default;

 public:
  StringType GetString() const { return string_; }
  Index GetPosition() const { return position_; }

  bool IsPastEnd() const {
    return position_ == static_cast<Index>(string_.size());
  }

 public:
  CodePointIterator begin() const { return *this; }
  CodePointIterator end() const {
    return CodePointIterator{string_, past_end_tag_t{}};
  }

 public:
  bool operator==(const CodePointIterator& other) const {
    // You should compare iterator that iterate on the same string.
    Expects(this->string_.data() == other.string_.data() &&
            this->string_.size() == other.string_.size());
    return this->position_ == other.position_;
  }
  bool operator!=(const CodePointIterator& other) const {
    return !this->operator==(other);
  }

  CodePointIterator& operator++() {
    Expects(!IsPastEnd());
    Forward();
    return *this;
  }

  CodePointIterator operator++(int) {
    Expects(!IsPastEnd());
    CodePointIterator old = *this;
    Forward();
    return old;
  }

  CodePoint operator*() const {
    return NextCodePointFunction(string_, position_, &next_position_cache_);
  }

 private:
  void Forward() {
    if (next_position_cache_ > position_) {
      position_ = next_position_cache_;
    } else {
      NextCodePointFunction(string_, position_, &position_);
    }
  }

 private:
  StringType string_;
  Index position_;
  mutable Index next_position_cache_;
};

using Utf8CodePointIterator =
    CodePointIterator<std::string_view, &Utf8NextCodePoint>;

using Utf16CodePointIterator =
    CodePointIterator<std::u16string_view, &Utf16NextCodePoint>;

void Utf8EncodeCodePointAppend(CodePoint code_point, std::string& str);
void Utf16EncodeCodePointAppend(CodePoint code_point, std::u16string& str);

std::string ToUtf8(std::u16string_view s);
std::u16string ToUtf16(std::string_view s);

#ifdef WIN32
std::string ToUtf8(std::wstring_view s);
std::wstring ToUtf16WString(std::string_view s);
#endif

// If given s is not a valid utf16 string, return value is UD.
bool Utf16IsValidInsertPosition(std::u16string_view s, gsl::index position);

// Return position after the character making predicate returns true or 0 if no
// character doing so.
gsl::index Utf16BackwardUntil(std::u16string_view str, gsl::index position,
                              const std::function<bool(CodePoint)>& predicate);
// Return position before the character making predicate returns true or
// str.size() if no character doing so.
gsl::index Utf16ForwardUntil(std::u16string_view str, gsl::index position,
                             const std::function<bool(CodePoint)>& predicate);

gsl::index Utf16PreviousWord(std::u16string_view str, gsl::index position,
                             bool* is_space = nullptr);
gsl::index Utf16NextWord(std::u16string_view str, gsl::index position,
                         bool* is_space = nullptr);

char16_t ToLower(char16_t c);
char16_t ToUpper(char16_t c);
std::u16string ToLower(std::u16string_view s);
std::u16string ToUpper(std::u16string_view s);
}  // namespace cru