| 1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
 | #pragma once
#include <string>
#include "Base.hpp"
namespace cru {
using CodePoint = std::int32_t;
constexpr CodePoint k_invalid_code_point = -1;
class TextEncodeException : public std::runtime_error {
 public:
  using runtime_error::runtime_error;
};
inline bool IsUtf16SurrogatePairCodeUnit(char16_t c) {
  return c >= 0xD800 && c <= 0xDFFF;
}
inline bool IsUtf16SurrogatePairLeading(char16_t c) {
  return c >= 0xD800 && c <= 0xDBFF;
}
inline bool IsUtf16SurrogatePairTrailing(char16_t c) {
  return c >= 0xDC00 && c <= 0xDFFF;
}
CodePoint Utf8NextCodePoint(std::string_view str, Index current,
                            Index* next_position);
CodePoint Utf16NextCodePoint(std::u16string_view str, Index current,
                             Index* next_position);
CodePoint Utf16PreviousCodePoint(std::u16string_view str, Index current,
                                 Index* previous_position);
template <typename StringType>
using NextCodePointFunctionType = CodePoint (*)(StringType, Index, Index*);
template <typename StringType,
          NextCodePointFunctionType<StringType> NextCodePointFunction>
class CodePointIterator {
 public:
  using difference_type = Index;
  using value_type = CodePoint;
  using pointer = void;
  using reference = value_type;
  using iterator_category = std::forward_iterator_tag;
 public:
  struct past_end_tag_t {};
  explicit CodePointIterator(StringType string)
      : string_(std::move(string)), position_(0) {}
  explicit CodePointIterator(StringType string, past_end_tag_t)
      : string_(std::move(string)), position_(string_.size()) {}
  CRU_DEFAULT_COPY(CodePointIterator)
  CRU_DEFAULT_MOVE(CodePointIterator)
  ~CodePointIterator() = default;
 public:
  StringType GetString() const { return string_; }
  Index GetPosition() const { return position_; }
  bool IsPastEnd() const {
    return position_ == static_cast<Index>(string_.size());
  }
 public:
  CodePointIterator begin() const { return *this; }
  CodePointIterator end() const {
    return CodePointIterator{string_, past_end_tag_t{}};
  }
 public:
  bool operator==(const CodePointIterator& other) const {
    // You should compare iterator that iterate on the same string.
    Expects(this->string_.data() == other.string_.data() &&
            this->string_.size() == other.string_.size());
    return this->position_ == other.position_;
  }
  bool operator!=(const CodePointIterator& other) const {
    return !this->operator==(other);
  }
  CodePointIterator& operator++() {
    Expects(!IsPastEnd());
    Forward();
    return *this;
  }
  CodePointIterator operator++(int) {
    Expects(!IsPastEnd());
    CodePointIterator old = *this;
    Forward();
    return old;
  }
  CodePoint operator*() const {
    return NextCodePointFunction(string_, position_, &next_position_cache_);
  }
 private:
  void Forward() {
    if (next_position_cache_ > position_) {
      position_ = next_position_cache_;
    } else {
      NextCodePointFunction(string_, position_, &position_);
    }
  }
 private:
  StringType string_;
  Index position_;
  mutable Index next_position_cache_;
};
using Utf8CodePointIterator =
    CodePointIterator<std::string_view, &Utf8NextCodePoint>;
using Utf16CodePointIterator =
    CodePointIterator<std::u16string_view, &Utf16NextCodePoint>;
void Utf8EncodeCodePointAppend(CodePoint code_point, std::string& str);
void Utf16EncodeCodePointAppend(CodePoint code_point, std::u16string& str);
std::string ToUtf8(std::u16string_view s);
std::u16string ToUtf16(std::string_view s);
// If given s is not a valid utf16 string, return value is UD.
bool Utf16IsValidInsertPosition(std::u16string_view s, gsl::index position);
}  // namespace cru
 |