aboutsummaryrefslogtreecommitdiff
path: root/src/base/String.cpp
diff options
context:
space:
mode:
authorcrupest <crupest@outlook.com>2024-10-06 13:57:39 +0800
committercrupest <crupest@outlook.com>2024-10-06 13:57:39 +0800
commitdfe62dcf8bcefc523b466e127c3edc4dc2756629 (patch)
tree1c751a14ba0da07ca2ff805633f97568060aa4c9 /src/base/String.cpp
parentf51eb955e188858272230a990565931e7403f23b (diff)
downloadcru-dfe62dcf8bcefc523b466e127c3edc4dc2756629.tar.gz
cru-dfe62dcf8bcefc523b466e127c3edc4dc2756629.tar.bz2
cru-dfe62dcf8bcefc523b466e127c3edc4dc2756629.zip
Rename common to base.
Diffstat (limited to 'src/base/String.cpp')
-rw-r--r--src/base/String.cpp672
1 files changed, 672 insertions, 0 deletions
diff --git a/src/base/String.cpp b/src/base/String.cpp
new file mode 100644
index 00000000..47b64373
--- /dev/null
+++ b/src/base/String.cpp
@@ -0,0 +1,672 @@
+#include "cru/base/String.h"
+
+#include "cru/base/Buffer.h"
+#include "cru/base/Exception.h"
+#include "cru/base/StringToNumberConverter.h"
+#include "cru/base/StringUtil.h"
+
+#include <double-conversion/double-conversion.h>
+#include <double-conversion/string-to-double.h>
+
+#include <algorithm>
+#include <cmath>
+#include <cstring>
+#include <functional>
+#include <string_view>
+
+namespace cru {
+template <typename C>
+Index GetStrSize(const C* str) {
+ Index i = 0;
+ while (str[i]) {
+ i++;
+ }
+ return i;
+}
+
+String String::FromUtf8(const char* str) {
+ return FromUtf8(str, GetStrSize(str));
+}
+
+String String::FromUtf8(const char* str, Index size) {
+ String result;
+ Utf8CodePointIterator iter(str, size);
+ for (auto cp : iter) {
+ Utf16EncodeCodePointAppend(
+ cp,
+ std::bind(&String::push_back, std::ref(result), std::placeholders::_1));
+ }
+ return result;
+}
+
+String String::FromUtf8(const std::byte* str, Index size) {
+ return String::FromUtf8(reinterpret_cast<const char*>(str), size);
+}
+
+String String::FromUtf8(const Buffer& buffer) {
+ return String::FromUtf8(buffer.GetUsedBeginPtr(), buffer.GetUsedSize());
+}
+
+String String::FromStdPath(const std::filesystem::path& path) {
+ return String::FromUtf8(path.string());
+}
+
+char16_t String::kEmptyBuffer[1] = {0};
+
+String::String(const_pointer str) : String(str, GetStrSize(str)) {}
+
+String::String(const_pointer str, Index size) {
+ this->buffer_ = new value_type[size + 1];
+ std::memcpy(this->buffer_, str, size * sizeof(char16_t));
+ this->buffer_[size] = 0;
+ this->size_ = size;
+ this->capacity_ = size;
+}
+
+String::String(size_type size, value_type ch) : String() {
+ reserve(size);
+ for (Index i = 0; i < size; i++) {
+ append(ch);
+ }
+}
+
+String::String(std::initializer_list<char16_t> l)
+ : String(l.begin(), l.size()) {}
+
+#ifdef CRU_PLATFORM_WINDOWS
+String::String(const wchar_t* str) : String(str, GetStrSize(str)) {}
+String::String(const wchar_t* str, Index size)
+ : String(reinterpret_cast<const char16_t*>(str), size) {}
+#endif
+
+String::String(const String& other) {
+ if (other.size_ == 0) return;
+ this->buffer_ = new value_type[other.size_ + 1];
+ std::memcpy(this->buffer_, other.buffer_, other.size_ * sizeof(value_type));
+ this->buffer_[other.size_] = 0;
+ this->size_ = other.size_;
+ this->capacity_ = other.size_;
+}
+
+String::String(String&& other) noexcept {
+ this->buffer_ = other.buffer_;
+ this->size_ = other.size_;
+ this->capacity_ = other.capacity_;
+ other.buffer_ = kEmptyBuffer;
+ other.size_ = 0;
+ other.capacity_ = 0;
+}
+
+String& String::operator=(const String& other) {
+ if (this != &other) {
+ if (this->buffer_ != kEmptyBuffer) {
+ delete[] this->buffer_;
+ }
+
+ if (other.buffer_ == kEmptyBuffer) {
+ this->buffer_ = kEmptyBuffer;
+ this->size_ = 0;
+ this->capacity_ = 0;
+ } else {
+ this->buffer_ = new value_type[other.size_ + 1];
+ std::memcpy(this->buffer_, other.buffer_,
+ other.size_ * sizeof(value_type));
+ this->buffer_[other.size_] = 0;
+ this->size_ = other.size_;
+ this->capacity_ = other.size_;
+ }
+ }
+ return *this;
+}
+
+String& String::operator=(String&& other) noexcept {
+ if (this != &other) {
+ if (this->buffer_ != kEmptyBuffer) {
+ delete[] this->buffer_;
+ }
+
+ this->buffer_ = other.buffer_;
+ this->size_ = other.size_;
+ this->capacity_ = other.capacity_;
+ other.buffer_ = kEmptyBuffer;
+ other.size_ = 0;
+ other.capacity_ = 0;
+ }
+ return *this;
+}
+
+String::~String() {
+ if (this->buffer_ != kEmptyBuffer) {
+ delete[] this->buffer_;
+ }
+}
+
+String::String(from_buffer_tag, pointer buffer, Index size, Index capacity)
+ : buffer_(buffer), size_(size), capacity_(capacity) {}
+
+void String::clear() { resize(0); }
+
+void String::resize(Index new_size) {
+ Expects(new_size >= 0);
+
+ if (new_size == size_) return;
+
+ if (new_size < size_) {
+ size_ = new_size;
+ buffer_[size_] = 0;
+ } else {
+ reserve(new_size);
+ std::memset(buffer_ + size_, 0, sizeof(value_type) * (new_size - size_));
+ buffer_[new_size] = 0;
+ size_ = new_size;
+ }
+}
+
+void String::shrink_to_fit() {
+ if (capacity_ == size_) return;
+ if (size_ == 0) {
+ delete[] buffer_;
+ buffer_ = kEmptyBuffer;
+ size_ = 0;
+ capacity_ = 0;
+ } else {
+ auto new_buffer = new value_type[size_ + 1];
+ std::memcpy(new_buffer, buffer_, sizeof(value_type) * size_);
+ delete[] buffer_;
+ buffer_ = new_buffer;
+ capacity_ = size_;
+ }
+}
+
+void String::reserve(Index new_capacity) {
+ Expects(new_capacity >= 0);
+ if (new_capacity <= this->capacity_) return;
+ if (new_capacity > 0) {
+ pointer new_buffer = new value_type[new_capacity + 1];
+ if (this->buffer_ != kEmptyBuffer) {
+ memcpy(new_buffer, this->buffer_, this->size_ * sizeof(value_type));
+ delete[] this->buffer_;
+ }
+ new_buffer[this->size_] = 0;
+ this->buffer_ = new_buffer;
+ this->capacity_ = new_capacity;
+ }
+}
+
+String::iterator String::insert(const_iterator pos, const_iterator str,
+ Index size) {
+ Expects(pos >= cbegin() && pos <= cend());
+
+ std::vector<value_type> backup_buffer;
+ if (str >= buffer_ && str < buffer_ + size_) {
+ backup_buffer.resize(size);
+ std::copy(str, str + size, backup_buffer.begin());
+ str = backup_buffer.data();
+ }
+
+ Index index = pos - cbegin();
+
+ Index new_size = size_ + size;
+ if (new_size > capacity_) {
+ auto new_capacity = capacity_;
+ if (new_capacity == 0) {
+ new_capacity = new_size;
+ } else {
+ while (new_capacity < new_size) {
+ new_capacity *= 2;
+ }
+ }
+
+ this->reserve(new_capacity);
+ }
+
+ std::memmove(begin() + index + size, begin() + index,
+ (size_ - index) * sizeof(value_type));
+ std::memcpy(begin() + index, str, size * sizeof(value_type));
+
+ buffer_[new_size] = 0;
+ size_ = new_size;
+
+ return begin() + new_size;
+}
+
+String::iterator String::erase(const_iterator start, const_iterator end) {
+ Expects(buffer_ <= start && start <= end && end <= buffer_ + size_);
+
+ Index new_size = size_ - (end - start);
+
+ auto s = const_cast<iterator>(start);
+ auto e = const_cast<iterator>(end);
+
+ std::memmove(s, e, (cend() - end) * sizeof(value_type));
+ this->size_ = new_size;
+ this->buffer_[new_size] = 0;
+
+ return s;
+}
+
+String& String::operator+=(StringView other) {
+ append(other);
+ return *this;
+}
+
+StringView String::View() const { return *this; }
+
+Index String::Find(value_type value, Index start) const {
+ return View().Find(value, start);
+}
+
+std::vector<String> String::Split(value_type separator,
+ bool remove_space_line) const {
+ return View().Split(separator, remove_space_line);
+}
+
+std::vector<String> String::SplitToLines(bool remove_space_line) const {
+ return View().SplitToLines(remove_space_line);
+}
+
+bool String::StartWith(StringView str) const { return View().StartWith(str); }
+
+bool String::EndWith(StringView str) const { return View().EndWith(str); }
+
+std::string String::ToUtf8() const { return View().ToUtf8(); }
+
+Buffer String::ToUtf8Buffer(bool end_zero) const {
+ return View().ToUtf8Buffer();
+}
+
+String& String::TrimStart() {
+ if (size_ == 0) return *this;
+
+ auto start = begin();
+ while (start != end() && IsWhitespace(*start)) {
+ ++start;
+ }
+
+ if (start == end()) {
+ clear();
+ } else {
+ erase(begin(), start);
+ }
+
+ return *this;
+}
+
+String& String::TrimEnd() {
+ if (size_ == 0) return *this;
+ while (size_ > 0 && IsWhitespace(buffer_[size_ - 1])) {
+ size_--;
+ }
+
+ return *this;
+}
+
+String& String::Trim() {
+ TrimStart();
+ TrimEnd();
+ return *this;
+}
+
+void String::AppendCodePoint(CodePoint code_point) {
+ if (!Utf16EncodeCodePointAppend(
+ code_point,
+ std::bind(&String::push_back, this, std::placeholders::_1))) {
+ throw TextEncodeException(u"Code point out of range.");
+ }
+}
+
+Index String::IndexFromCodeUnitToCodePoint(Index code_unit_index) const {
+ return View().IndexFromCodeUnitToCodePoint(code_unit_index);
+}
+
+Index String::IndexFromCodePointToCodeUnit(Index code_point_index) const {
+ return View().IndexFromCodePointToCodeUnit(code_point_index);
+}
+
+Range String::RangeFromCodeUnitToCodePoint(Range code_unit_range) const {
+ return View().RangeFromCodeUnitToCodePoint(code_unit_range);
+}
+
+Range String::RangeFromCodePointToCodeUnit(Range code_point_range) const {
+ return View().RangeFromCodePointToCodeUnit(code_point_range);
+}
+
+int String::ParseToInt(Index* processed_characters_count,
+ StringToNumberFlag flags, int base) const {
+ return View().ParseToInt(processed_characters_count, flags, base);
+}
+
+long long String::ParseToLongLong(Index* processed_characters_count,
+ StringToNumberFlag flags, int base) const {
+ return View().ParseToLongLong(processed_characters_count, flags, base);
+}
+
+float String::ParseToFloat(Index* processed_characters_count,
+ StringToNumberFlag flags) const {
+ return View().ParseToFloat(processed_characters_count, flags);
+}
+
+double String::ParseToDouble(Index* processed_characters_count,
+ StringToNumberFlag flags) const {
+ return View().ParseToDouble(processed_characters_count, flags);
+}
+
+std::vector<float> String::ParseToFloatList(value_type separator) const {
+ return View().ParseToFloatList(separator);
+}
+
+std::vector<double> String::ParseToDoubleList(value_type separator) const {
+ return View().ParseToDoubleList(separator);
+}
+
+std::ostream& operator<<(std::ostream& os, const String& value) {
+ os << value.ToUtf8();
+ return os;
+}
+
+namespace {
+inline int Compare(char16_t left, char16_t right) {
+ if (left < right) return -1;
+ if (left > right) return 1;
+ return 0;
+}
+
+inline int CaseInsensitiveCompare(char16_t left, char16_t right) {
+ return Compare(ToLower(left), ToLower(right));
+}
+} // namespace
+
+int String::Compare(const String& other) const { return View().Compare(other); }
+int String::CaseInsensitiveCompare(const String& other) const {
+ return View().CaseInsensitiveCompare(other);
+}
+
+int StringView::Compare(const StringView& other) const {
+ const_iterator i1 = cbegin();
+ const_iterator i2 = other.cbegin();
+
+ const_iterator end1 = cend();
+ const_iterator end2 = other.cend();
+
+ while (i1 != end1 && i2 != end2) {
+ int r = cru::Compare(*i1, *i2);
+ if (r != 0) return r;
+ i1++;
+ i2++;
+ }
+
+ if (i1 == end1) {
+ if (i2 == end2) {
+ return 0;
+ } else {
+ return -1;
+ }
+ } else {
+ return 1;
+ }
+}
+
+int StringView::CaseInsensitiveCompare(const StringView& other) const {
+ const_iterator i1 = cbegin();
+ const_iterator i2 = other.cbegin();
+
+ const_iterator end1 = cend();
+ const_iterator end2 = other.cend();
+
+ while (i1 != end1 && i2 != end2) {
+ int r = cru::CaseInsensitiveCompare(*i1, *i2);
+ if (r != 0) return r;
+ i1++;
+ i2++;
+ }
+
+ if (i1 == end1) {
+ if (i2 == end2) {
+ return 0;
+ } else {
+ return -1;
+ }
+ } else {
+ return 1;
+ }
+}
+
+StringView StringView::substr(Index pos) {
+ Expects(pos >= 0 && pos < size_);
+ return StringView(ptr_ + pos, size_ - pos);
+}
+
+StringView StringView::substr(Index pos, Index size) {
+ Expects(pos >= 0 && pos < size_);
+
+ return StringView(ptr_ + pos, std::min(size, size_ - pos));
+}
+
+Index StringView::Find(value_type value, Index start) const {
+ Expects(start >= 0 && start <= size_);
+
+ for (Index i = start; i < size_; ++i) {
+ if (ptr_[i] == value) return i;
+ }
+ return -1;
+}
+
+std::vector<String> StringView::Split(value_type separator,
+ bool remove_space_line) const {
+ std::vector<String> result;
+
+ if (size_ == 0) return result;
+
+ Index line_start = 0;
+ Index line_end = 0;
+ while (line_end < size_) {
+ if (ptr_[line_end] == separator) {
+ if (remove_space_line) {
+ bool add = false;
+ for (Index i = line_start; i < line_end; i++) {
+ if (!IsWhitespace(ptr_[i])) {
+ add = true;
+ break;
+ }
+ }
+ if (add) result.emplace_back(begin() + line_start, begin() + line_end);
+ } else {
+ result.emplace_back(begin() + line_start, begin() + line_end);
+ }
+ line_start = line_end + 1;
+ line_end = line_start;
+ } else {
+ line_end++;
+ }
+ }
+
+ if (remove_space_line) {
+ bool add = false;
+ for (Index i = line_start; i < size_; i++) {
+ if (!IsWhitespace(ptr_[i])) {
+ add = true;
+ break;
+ }
+ }
+ if (add) result.emplace_back(begin() + line_start, begin() + size_);
+ } else {
+ result.emplace_back(begin() + line_start, begin() + size_);
+ }
+
+ return result;
+}
+
+std::vector<String> StringView::SplitToLines(bool remove_space_line) const {
+ return Split(u'\n', remove_space_line);
+}
+
+bool StringView::StartWith(StringView str) const {
+ if (str.size() > size_) return false;
+ return std::memcmp(str.data(), ptr_, str.size()) == 0;
+}
+
+bool StringView::EndWith(StringView str) const {
+ if (str.size() > size_) return false;
+ return std::memcmp(str.data(), ptr_ + size_ - str.size(), str.size()) == 0;
+}
+
+Index StringView::IndexFromCodeUnitToCodePoint(Index code_unit_index) const {
+ auto iter = CodePointIterator();
+ Index result = 0;
+ while (iter.GetPosition() < code_unit_index && !iter.IsPastEnd()) {
+ ++iter;
+ ++result;
+ }
+ return result;
+}
+
+Index StringView::IndexFromCodePointToCodeUnit(Index code_point_index) const {
+ auto iter = CodePointIterator();
+ Index cpi = 0;
+ while (cpi < code_point_index && !iter.IsPastEnd()) {
+ ++iter;
+ ++cpi;
+ }
+ return iter.GetPosition();
+}
+
+Range StringView::RangeFromCodeUnitToCodePoint(Range code_unit_range) const {
+ return Range::FromTwoSides(
+ IndexFromCodeUnitToCodePoint(code_unit_range.GetStart()),
+ IndexFromCodeUnitToCodePoint(code_unit_range.GetEnd()));
+}
+
+Range StringView::RangeFromCodePointToCodeUnit(Range code_point_range) const {
+ return Range::FromTwoSides(
+ IndexFromCodePointToCodeUnit(code_point_range.GetStart()),
+ IndexFromCodePointToCodeUnit(code_point_range.GetEnd()));
+}
+
+std::string StringView::ToUtf8() const {
+ std::string result;
+ for (auto cp : CodePointIterator()) {
+ Utf8EncodeCodePointAppend(
+ cp, std::bind(&std::string::push_back, std::ref(result),
+ std::placeholders::_1));
+ }
+ return result;
+}
+
+Buffer StringView::ToUtf8Buffer(bool end_zero) const {
+ const Index grow_step = 10;
+ Buffer buffer(grow_step); // Maybe another init value is more reasonable.
+ auto push_back = [&buffer](char c) {
+ if (buffer.IsUsedReachEnd()) {
+ buffer.ResizeBuffer(buffer.GetBufferSize() + grow_step, true);
+ }
+ buffer.PushBack(static_cast<std::byte>(c));
+ };
+ for (auto cp : CodePointIterator()) {
+ Utf8EncodeCodePointAppend(cp, push_back);
+ }
+ if (end_zero) {
+ push_back(0);
+ }
+ return buffer;
+}
+
+int StringView::ParseToInt(Index* processed_characters_count,
+ StringToNumberFlag flags, int base) const {
+ return ParseToInteger<int>(processed_characters_count, flags, base);
+}
+
+long long StringView::ParseToLongLong(Index* processed_characters_count,
+ StringToNumberFlag flags,
+ int base) const {
+ return ParseToInteger<long long>(processed_characters_count, flags, base);
+}
+
+static int MapStringToDoubleFlags(StringToNumberFlag flags) {
+ int f = double_conversion::StringToDoubleConverter::ALLOW_CASE_INSENSIBILITY;
+ if (flags & StringToNumberFlags::kAllowLeadingSpaces) {
+ f |= double_conversion::StringToDoubleConverter::ALLOW_LEADING_SPACES;
+ }
+ if (flags & StringToNumberFlags::kAllowTrailingSpaces) {
+ f |= double_conversion::StringToDoubleConverter::ALLOW_TRAILING_SPACES;
+ }
+ if (flags & StringToNumberFlags::kAllowTrailingJunk) {
+ f |= double_conversion::StringToDoubleConverter::ALLOW_TRAILING_JUNK;
+ }
+ return f;
+}
+
+static double_conversion::StringToDoubleConverter CreateStringToDoubleConverter(
+ StringToNumberFlag flags) {
+ return {MapStringToDoubleFlags(flags), 0.0, NAN, "inf", "nan"};
+}
+
+float StringView::ParseToFloat(Index* processed_characters_count,
+ StringToNumberFlag flags) const {
+ int pcc;
+ auto result = CreateStringToDoubleConverter(flags).StringToFloat(
+ reinterpret_cast<const uc16*>(ptr_), static_cast<int>(size_), &pcc);
+ if (processed_characters_count != nullptr) {
+ *processed_characters_count = pcc;
+ }
+
+ if (flags & StringToNumberFlags::kThrowOnError && std::isnan(result)) {
+ throw Exception(u"Result of string to float conversion is NaN");
+ }
+
+ return result;
+}
+
+double StringView::ParseToDouble(Index* processed_characters_count,
+ StringToNumberFlag flags) const {
+ int pcc;
+ auto result = CreateStringToDoubleConverter(flags).StringToDouble(
+ reinterpret_cast<const uc16*>(ptr_), static_cast<int>(size_), &pcc);
+ if (processed_characters_count != nullptr) {
+ *processed_characters_count = pcc;
+ }
+
+ if (flags & StringToNumberFlags::kThrowOnError && std::isnan(result)) {
+ throw Exception(u"Result of string to double conversion is NaN");
+ }
+
+ return result;
+}
+
+std::vector<float> StringView::ParseToFloatList(value_type separator) const {
+ std::vector<float> result;
+ auto list = Split(separator, true);
+ for (auto& item : list) {
+ auto value = item.ParseToFloat();
+ if (std::isnan(value)) {
+ throw Exception(u"Invalid double value.");
+ }
+ result.push_back(value);
+ }
+ return result;
+}
+
+std::vector<double> StringView::ParseToDoubleList(value_type separator) const {
+ std::vector<double> result;
+ auto list = Split(separator, true);
+ for (auto& item : list) {
+ auto value = item.ParseToDouble();
+ if (std::isnan(value)) {
+ throw Exception(u"Invalid double value.");
+ }
+ result.push_back(value);
+ }
+ return result;
+}
+
+String ToLower(StringView s) {
+ String result;
+ for (auto c : s) result.push_back(ToLower(c));
+ return result;
+}
+
+String ToUpper(StringView s) {
+ String result;
+ for (auto c : s) result.push_back(ToUpper(c));
+ return result;
+}
+} // namespace cru