aboutsummaryrefslogtreecommitdiff
path: root/include/cru/common/StringUtil.hpp
diff options
context:
space:
mode:
authorcrupest <crupest@outlook.com>2020-07-07 00:31:11 +0800
committercrupest <crupest@outlook.com>2020-07-07 00:31:11 +0800
commit3bf5b1fcf2315a1ce180ad69eb6bb1e57be37ca5 (patch)
tree722b5b5d808ce0ead4bc497dd910e081fa601656 /include/cru/common/StringUtil.hpp
parentcb241b7289abfc10111d3180def55ca1fbb2edb4 (diff)
downloadcru-3bf5b1fcf2315a1ce180ad69eb6bb1e57be37ca5.tar.gz
cru-3bf5b1fcf2315a1ce180ad69eb6bb1e57be37ca5.tar.bz2
cru-3bf5b1fcf2315a1ce180ad69eb6bb1e57be37ca5.zip
...
Diffstat (limited to 'include/cru/common/StringUtil.hpp')
-rw-r--r--include/cru/common/StringUtil.hpp115
1 files changed, 89 insertions, 26 deletions
diff --git a/include/cru/common/StringUtil.hpp b/include/cru/common/StringUtil.hpp
index 714f1d49..b8edc302 100644
--- a/include/cru/common/StringUtil.hpp
+++ b/include/cru/common/StringUtil.hpp
@@ -10,51 +10,114 @@ class TextEncodeException : public std::runtime_error {
using runtime_error::runtime_error;
};
-inline bool IsSurrogatePair(char16_t c) { return c >= 0xD800 && c <= 0xDFFF; }
+inline bool IsUtf16SurrogatePairCodeUnit(char16_t c) {
+ return c >= 0xD800 && c <= 0xDFFF;
+}
-inline bool IsSurrogatePairLeading(char16_t c) {
+inline bool IsUtf16SurrogatePairLeading(char16_t c) {
return c >= 0xD800 && c <= 0xDBFF;
}
-inline bool IsSurrogatePairTrailing(char16_t c) {
+inline bool IsUtf16SurrogatePairTrailing(char16_t c) {
return c >= 0xDC00 && c <= 0xDFFF;
}
-class Utf16Iterator : public Object {
+CodePoint Utf8NextCodePoint(std::string_view str, Index current,
+ Index* next_position);
+
+CodePoint Utf16NextCodePoint(std::u16string_view str, Index current,
+ Index* next_position);
+CodePoint Utf16PreviousCodePoint(std::u16string_view str, Index current,
+ Index* previous_position);
+
+template <typename StringType>
+using NextCodePointFunctionType = CodePoint (*)(StringType, Index, Index*);
+
+template <typename StringType,
+ NextCodePointFunctionType<StringType> NextCodePointFunction>
+class CodePointIterator {
+ public:
+ using difference_type = Index;
+ using value_type = CodePoint;
+ using pointer = void;
+ using reference = value_type;
+ using iterator_category = std::forward_iterator_tag;
+
public:
- explicit Utf16Iterator(std::u16string_view string)
- : string_(std::move(string)) {}
- Utf16Iterator(std::u16string_view string, Index position)
- : string_(std::move(string)), position_(position) {}
+ struct past_end_tag_t {};
- CRU_DEFAULT_COPY(Utf16Iterator)
- CRU_DEFAULT_MOVE(Utf16Iterator)
+ explicit CodePointIterator(StringType string)
+ : string_(std::move(string)), position_(0) {}
+ explicit CodePointIterator(StringType string, past_end_tag_t)
+ : string_(std::move(string)), position_(string_.size()) {}
- ~Utf16Iterator() = default;
+ CRU_DEFAULT_COPY(CodePointIterator)
+ CRU_DEFAULT_MOVE(CodePointIterator)
+
+ ~CodePointIterator() = default;
public:
- void SetPositionToHead() { position_ = 0; }
- void SetPosition(Index position) { position_ = position; }
+ StringType GetString() const { return string_; }
+ Index GetPosition() const { return position_; }
+
+ bool IsPastEnd() const {
+ return position_ == static_cast<Index>(string_.size());
+ }
- // Backward current position and get previous code point. Return
- // k_invalid_code_point if reach head. Throw TextEncodeException if encounter
- // encoding problem.
- CodePoint Previous();
+ public:
+ CodePointIterator begin() const { return *this; }
+ CodePointIterator end() const {
+ return CodePointIterator{string_, past_end_tag_t{}};
+ }
- // Advance current position and get next code point. Return
- // k_invalid_code_point if reach tail. Throw TextEncodeException if encounter
- // encoding problem.
- CodePoint Next();
+ public:
+ bool operator==(const CodePointIterator& other) const {
+ // You should compare iterator that iterate on the same string.
+ Expects(this->string_.data() == other.string_.data() &&
+ this->string_.size() == other.string_.size());
+ return this->position_ == other.position_;
+ }
+ bool operator!=(const CodePointIterator& other) const {
+ return !this->operator==(other);
+ }
+
+ CodePointIterator& operator++() {
+ Expects(!IsPastEnd());
+ Forward();
+ return *this;
+ }
+
+ CodePointIterator operator++(int) {
+ Expects(!IsPastEnd());
+ CodePointIterator old = *this;
+ Forward();
+ return old;
+ }
+
+ CodePoint operator*() const {
+ return NextCodePointFunction(string_, position_, &next_position_cache_);
+ }
- Index CurrentPosition() const { return this->position_; }
+ private:
+ void Forward() {
+ if (next_position_cache_ > position_) {
+ position_ = next_position_cache_;
+ } else {
+ NextCodePointFunction(string_, position_, &position_);
+ }
+ }
private:
- std::u16string_view string_;
- Index position_ = 0;
+ StringType string_;
+ Index position_;
+ mutable Index next_position_cache_;
};
-Index PreviousIndex(std::u16string_view string, Index current);
-Index NextIndex(std::u16string_view string, Index current);
+using Utf8CodePointIterator =
+ CodePointIterator<std::string_view, &Utf8NextCodePoint>;
+
+using Utf16CodePointIterator =
+ CodePointIterator<std::u16string_view, &Utf16NextCodePoint>;
std::string ToUtf8(const std::u16string& s);
inline std::string ToUtf8(std::u16string_view s) {