aboutsummaryrefslogtreecommitdiff
path: root/include/cru/common/StringUtil.hpp
diff options
context:
space:
mode:
authorcrupest <crupest@outlook.com>2020-07-05 23:06:02 +0800
committercrupest <crupest@outlook.com>2020-07-05 23:06:02 +0800
commit5c805e494425a88da1813902b1ad8a1ab351e30d (patch)
treebe3cfd96dcac19db3e256d610d48b5083c489a6c /include/cru/common/StringUtil.hpp
parentbbec59718bf8a824583869126762013112f8e568 (diff)
downloadcru-5c805e494425a88da1813902b1ad8a1ab351e30d.tar.gz
cru-5c805e494425a88da1813902b1ad8a1ab351e30d.tar.bz2
cru-5c805e494425a88da1813902b1ad8a1ab351e30d.zip
...
Diffstat (limited to 'include/cru/common/StringUtil.hpp')
-rw-r--r--include/cru/common/StringUtil.hpp79
1 files changed, 66 insertions, 13 deletions
diff --git a/include/cru/common/StringUtil.hpp b/include/cru/common/StringUtil.hpp
index a44ae6b4..714f1d49 100644
--- a/include/cru/common/StringUtil.hpp
+++ b/include/cru/common/StringUtil.hpp
@@ -3,37 +3,90 @@
namespace cru {
using CodePoint = std::int32_t;
-constexpr CodePoint k_code_point_end = -1;
+constexpr CodePoint k_invalid_code_point = -1;
class TextEncodeException : public std::runtime_error {
public:
using runtime_error::runtime_error;
};
-class Utf8Iterator : public Object {
+inline bool IsSurrogatePair(char16_t c) { return c >= 0xD800 && c <= 0xDFFF; }
+
+inline bool IsSurrogatePairLeading(char16_t c) {
+ return c >= 0xD800 && c <= 0xDBFF;
+}
+
+inline bool IsSurrogatePairTrailing(char16_t c) {
+ return c >= 0xDC00 && c <= 0xDFFF;
+}
+
+class Utf16Iterator : public Object {
public:
- explicit Utf8Iterator(const std::string_view& string) : string_(string) {}
- Utf8Iterator(const std::string_view& string, Index position)
- : string_(string), position_(position) {}
+ explicit Utf16Iterator(std::u16string_view string)
+ : string_(std::move(string)) {}
+ Utf16Iterator(std::u16string_view string, Index position)
+ : string_(std::move(string)), position_(position) {}
- CRU_DEFAULT_COPY(Utf8Iterator)
- CRU_DEFAULT_MOVE(Utf8Iterator)
+ CRU_DEFAULT_COPY(Utf16Iterator)
+ CRU_DEFAULT_MOVE(Utf16Iterator)
- ~Utf8Iterator() = default;
+ ~Utf16Iterator() = default;
public:
- void SetToHead() { position_ = 0; }
+ void SetPositionToHead() { position_ = 0; }
void SetPosition(Index position) { position_ = position; }
- // Advance current position and get next code point. Return k_code_point_end
- // if there is no next code unit(point). Throw TextEncodeException if decoding
- // fails.
+ // Backward current position and get previous code point. Return
+ // k_invalid_code_point if reach head. Throw TextEncodeException if encounter
+ // encoding problem.
+ CodePoint Previous();
+
+ // Advance current position and get next code point. Return
+ // k_invalid_code_point if reach tail. Throw TextEncodeException if encounter
+ // encoding problem.
CodePoint Next();
Index CurrentPosition() const { return this->position_; }
private:
- std::string_view string_;
+ std::u16string_view string_;
Index position_ = 0;
};
+
+Index PreviousIndex(std::u16string_view string, Index current);
+Index NextIndex(std::u16string_view string, Index current);
+
+std::string ToUtf8(const std::u16string& s);
+inline std::string ToUtf8(std::u16string_view s) {
+ return ToUtf8(std::u16string{s});
+}
+
+// class Utf8Iterator : public Object {
+// public:
+// explicit Utf8Iterator(const std::string_view& string) : string_(string) {}
+// Utf8Iterator(const std::string_view& string, Index position)
+// : string_(string), position_(position) {}
+
+// CRU_DEFAULT_COPY(Utf8Iterator)
+// CRU_DEFAULT_MOVE(Utf8Iterator)
+
+// ~Utf8Iterator() = default;
+
+// public:
+// void SetToHead() { position_ = 0; }
+// void SetPosition(Index position) { position_ = position; }
+
+// // Advance current position and get next code point. Return
+// k_invalid_code_point
+// // if there is no next code unit(point). Throw TextEncodeException if
+// decoding
+// // fails.
+// CodePoint Next();
+
+// Index CurrentPosition() const { return this->position_; }
+
+// private:
+// std::string_view string_;
+// Index position_ = 0;
+// };
} // namespace cru