diff options
author | Benjamin Barenblat <bbaren@google.com> | 2023-09-07 13:16:09 -0400 |
---|---|---|
committer | Benjamin Barenblat <bbaren@google.com> | 2023-09-07 13:16:09 -0400 |
commit | 6fdbff8bbce2a1debdc060df381f39e3dcfb65af (patch) | |
tree | 71f1ef38477a65d5cce472fc042c90087c2bb351 /absl/strings/ascii.cc | |
parent | 8d4a80fe37176b1170d7dce0772dea9584ec3e32 (diff) | |
parent | 29bf8085f3bf17b84d30e34b3d7ff8248fda404e (diff) | |
download | abseil-6fdbff8bbce2a1debdc060df381f39e3dcfb65af.tar.gz abseil-6fdbff8bbce2a1debdc060df381f39e3dcfb65af.tar.bz2 abseil-6fdbff8bbce2a1debdc060df381f39e3dcfb65af.zip |
Merge new upstream LTS 20230802.0
Diffstat (limited to 'absl/strings/ascii.cc')
-rw-r--r-- | absl/strings/ascii.cc | 60 |
1 files changed, 54 insertions, 6 deletions
diff --git a/absl/strings/ascii.cc b/absl/strings/ascii.cc index 868df2d1..16c96899 100644 --- a/absl/strings/ascii.cc +++ b/absl/strings/ascii.cc @@ -14,6 +14,10 @@ #include "absl/strings/ascii.h" +#include <climits> +#include <cstring> +#include <string> + namespace absl { ABSL_NAMESPACE_BEGIN namespace ascii_internal { @@ -153,18 +157,62 @@ ABSL_DLL const char kToUpper[256] = { }; // clang-format on +template <bool ToUpper> +constexpr void AsciiStrCaseFold(char* p, char* end) { + // The upper- and lowercase versions of ASCII characters differ by only 1 bit. + // When we need to flip the case, we can xor with this bit to achieve the + // desired result. Note that the choice of 'a' and 'A' here is arbitrary. We + // could have chosen 'z' and 'Z', or any other pair of characters as they all + // have the same single bit difference. + constexpr unsigned char kAsciiCaseBitFlip = 'a' ^ 'A'; + + constexpr char ch_a = ToUpper ? 'a' : 'A'; + constexpr char ch_z = ToUpper ? 'z' : 'Z'; + for (; p < end; ++p) { + unsigned char v = static_cast<unsigned char>(*p); + // We use & instead of && to ensure this always stays branchless + // We use static_cast<int> to suppress -Wbitwise-instead-of-logical + bool is_in_range = static_cast<bool>(static_cast<int>(ch_a <= v) & + static_cast<int>(v <= ch_z)); + v ^= is_in_range ? kAsciiCaseBitFlip : 0; + *p = static_cast<char>(v); + } +} + +static constexpr size_t ValidateAsciiCasefold() { + constexpr size_t num_chars = 1 + CHAR_MAX - CHAR_MIN; + size_t incorrect_index = 0; + char lowered[num_chars] = {}; + char uppered[num_chars] = {}; + for (unsigned int i = 0; i < num_chars; ++i) { + uppered[i] = lowered[i] = static_cast<char>(i); + } + AsciiStrCaseFold<false>(&lowered[0], &lowered[num_chars]); + AsciiStrCaseFold<true>(&uppered[0], &uppered[num_chars]); + for (size_t i = 0; i < num_chars; ++i) { + const char ch = static_cast<char>(i), + ch_upper = ('a' <= ch && ch <= 'z' ? 'A' + (ch - 'a') : ch), + ch_lower = ('A' <= ch && ch <= 'Z' ? 'a' + (ch - 'A') : ch); + if (uppered[i] != ch_upper || lowered[i] != ch_lower) { + incorrect_index = i > 0 ? i : num_chars; + break; + } + } + return incorrect_index; +} + +static_assert(ValidateAsciiCasefold() == 0, "error in case conversion"); + } // namespace ascii_internal void AsciiStrToLower(std::string* s) { - for (auto& ch : *s) { - ch = absl::ascii_tolower(static_cast<unsigned char>(ch)); - } + char* p = &(*s)[0]; // Guaranteed to be valid for empty strings + return ascii_internal::AsciiStrCaseFold<false>(p, p + s->size()); } void AsciiStrToUpper(std::string* s) { - for (auto& ch : *s) { - ch = absl::ascii_toupper(static_cast<unsigned char>(ch)); - } + char* p = &(*s)[0]; // Guaranteed to be valid for empty strings + return ascii_internal::AsciiStrCaseFold<true>(p, p + s->size()); } void RemoveExtraAsciiWhitespace(std::string* str) { |