1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
|
#pragma once
#include "Base.hpp"
namespace cru {
using CodePoint = std::int32_t;
constexpr CodePoint k_invalid_code_point = -1;
class TextEncodeException : public std::runtime_error {
public:
using runtime_error::runtime_error;
};
inline bool IsUtf16SurrogatePairCodeUnit(char16_t c) {
return c >= 0xD800 && c <= 0xDFFF;
}
inline bool IsUtf16SurrogatePairLeading(char16_t c) {
return c >= 0xD800 && c <= 0xDBFF;
}
inline bool IsUtf16SurrogatePairTrailing(char16_t c) {
return c >= 0xDC00 && c <= 0xDFFF;
}
CodePoint Utf8NextCodePoint(std::string_view str, Index current,
Index* next_position);
CodePoint Utf16NextCodePoint(std::u16string_view str, Index current,
Index* next_position);
CodePoint Utf16PreviousCodePoint(std::u16string_view str, Index current,
Index* previous_position);
template <typename StringType>
using NextCodePointFunctionType = CodePoint (*)(StringType, Index, Index*);
template <typename StringType,
NextCodePointFunctionType<StringType> NextCodePointFunction>
class CodePointIterator {
public:
using difference_type = Index;
using value_type = CodePoint;
using pointer = void;
using reference = value_type;
using iterator_category = std::forward_iterator_tag;
public:
struct past_end_tag_t {};
explicit CodePointIterator(StringType string)
: string_(std::move(string)), position_(0) {}
explicit CodePointIterator(StringType string, past_end_tag_t)
: string_(std::move(string)), position_(string_.size()) {}
CRU_DEFAULT_COPY(CodePointIterator)
CRU_DEFAULT_MOVE(CodePointIterator)
~CodePointIterator() = default;
public:
StringType GetString() const { return string_; }
Index GetPosition() const { return position_; }
bool IsPastEnd() const {
return position_ == static_cast<Index>(string_.size());
}
public:
CodePointIterator begin() const { return *this; }
CodePointIterator end() const {
return CodePointIterator{string_, past_end_tag_t{}};
}
public:
bool operator==(const CodePointIterator& other) const {
// You should compare iterator that iterate on the same string.
Expects(this->string_.data() == other.string_.data() &&
this->string_.size() == other.string_.size());
return this->position_ == other.position_;
}
bool operator!=(const CodePointIterator& other) const {
return !this->operator==(other);
}
CodePointIterator& operator++() {
Expects(!IsPastEnd());
Forward();
return *this;
}
CodePointIterator operator++(int) {
Expects(!IsPastEnd());
CodePointIterator old = *this;
Forward();
return old;
}
CodePoint operator*() const {
return NextCodePointFunction(string_, position_, &next_position_cache_);
}
private:
void Forward() {
if (next_position_cache_ > position_) {
position_ = next_position_cache_;
} else {
NextCodePointFunction(string_, position_, &position_);
}
}
private:
StringType string_;
Index position_;
mutable Index next_position_cache_;
};
using Utf8CodePointIterator =
CodePointIterator<std::string_view, &Utf8NextCodePoint>;
using Utf16CodePointIterator =
CodePointIterator<std::u16string_view, &Utf16NextCodePoint>;
void Utf8EncodeCodePointAppend(CodePoint code_point, std::string& str);
void Utf16EncodeCodePointAppend(CodePoint code_point, std::u16string& str);
std::string ToUtf8(std::u16string_view s);
std::u16string ToUtf16(std::string_view s);
} // namespace cru
|