aboutsummaryrefslogtreecommitdiff
path: root/include/cru/common/StringUtil.hpp
blob: 714f1d49ad4adccff7c3a4f39895c45a72289300 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#pragma once
#include "Base.hpp"

namespace cru {
using CodePoint = std::int32_t;
constexpr CodePoint k_invalid_code_point = -1;

class TextEncodeException : public std::runtime_error {
 public:
  using runtime_error::runtime_error;
};

inline bool IsSurrogatePair(char16_t c) { return c >= 0xD800 && c <= 0xDFFF; }

inline bool IsSurrogatePairLeading(char16_t c) {
  return c >= 0xD800 && c <= 0xDBFF;
}

inline bool IsSurrogatePairTrailing(char16_t c) {
  return c >= 0xDC00 && c <= 0xDFFF;
}

class Utf16Iterator : public Object {
 public:
  explicit Utf16Iterator(std::u16string_view string)
      : string_(std::move(string)) {}
  Utf16Iterator(std::u16string_view string, Index position)
      : string_(std::move(string)), position_(position) {}

  CRU_DEFAULT_COPY(Utf16Iterator)
  CRU_DEFAULT_MOVE(Utf16Iterator)

  ~Utf16Iterator() = default;

 public:
  void SetPositionToHead() { position_ = 0; }
  void SetPosition(Index position) { position_ = position; }

  // Backward current position and get previous code point. Return
  // k_invalid_code_point if reach head. Throw TextEncodeException if encounter
  // encoding problem.
  CodePoint Previous();

  // Advance current position and get next code point. Return
  // k_invalid_code_point if reach tail. Throw TextEncodeException if encounter
  // encoding problem.
  CodePoint Next();

  Index CurrentPosition() const { return this->position_; }

 private:
  std::u16string_view string_;
  Index position_ = 0;
};

Index PreviousIndex(std::u16string_view string, Index current);
Index NextIndex(std::u16string_view string, Index current);

std::string ToUtf8(const std::u16string& s);
inline std::string ToUtf8(std::u16string_view s) {
  return ToUtf8(std::u16string{s});
}

// class Utf8Iterator : public Object {
//  public:
//   explicit Utf8Iterator(const std::string_view& string) : string_(string) {}
//   Utf8Iterator(const std::string_view& string, Index position)
//       : string_(string), position_(position) {}

//   CRU_DEFAULT_COPY(Utf8Iterator)
//   CRU_DEFAULT_MOVE(Utf8Iterator)

//   ~Utf8Iterator() = default;

//  public:
//   void SetToHead() { position_ = 0; }
//   void SetPosition(Index position) { position_ = position; }

//   // Advance current position and get next code point. Return
//   k_invalid_code_point
//   // if there is no next code unit(point). Throw TextEncodeException if
//   decoding
//   // fails.
//   CodePoint Next();

//   Index CurrentPosition() const { return this->position_; }

//  private:
//   std::string_view string_;
//   Index position_ = 0;
// };
}  // namespace cru