From ac8afe6c78d4bde66c8bcf2f3dd9fefea7d4fac8 Mon Sep 17 00:00:00 2001 From: Abseil Team Date: Mon, 13 Mar 2023 11:44:25 -0700 Subject: Performance improvement for absl::AsciiStrToUpper() and absl::AsciiStrToLower() PiperOrigin-RevId: 516275043 Change-Id: I906ef0d96dddf12e3738490bd26cb05753ec008c --- absl/strings/ascii.cc | 60 +++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 54 insertions(+), 6 deletions(-) (limited to 'absl/strings/ascii.cc') diff --git a/absl/strings/ascii.cc b/absl/strings/ascii.cc index 868df2d1..16c96899 100644 --- a/absl/strings/ascii.cc +++ b/absl/strings/ascii.cc @@ -14,6 +14,10 @@ #include "absl/strings/ascii.h" +#include +#include +#include + namespace absl { ABSL_NAMESPACE_BEGIN namespace ascii_internal { @@ -153,18 +157,62 @@ ABSL_DLL const char kToUpper[256] = { }; // clang-format on +template +constexpr void AsciiStrCaseFold(char* p, char* end) { + // The upper- and lowercase versions of ASCII characters differ by only 1 bit. + // When we need to flip the case, we can xor with this bit to achieve the + // desired result. Note that the choice of 'a' and 'A' here is arbitrary. We + // could have chosen 'z' and 'Z', or any other pair of characters as they all + // have the same single bit difference. + constexpr unsigned char kAsciiCaseBitFlip = 'a' ^ 'A'; + + constexpr char ch_a = ToUpper ? 'a' : 'A'; + constexpr char ch_z = ToUpper ? 'z' : 'Z'; + for (; p < end; ++p) { + unsigned char v = static_cast(*p); + // We use & instead of && to ensure this always stays branchless + // We use static_cast to suppress -Wbitwise-instead-of-logical + bool is_in_range = static_cast(static_cast(ch_a <= v) & + static_cast(v <= ch_z)); + v ^= is_in_range ? kAsciiCaseBitFlip : 0; + *p = static_cast(v); + } +} + +static constexpr size_t ValidateAsciiCasefold() { + constexpr size_t num_chars = 1 + CHAR_MAX - CHAR_MIN; + size_t incorrect_index = 0; + char lowered[num_chars] = {}; + char uppered[num_chars] = {}; + for (unsigned int i = 0; i < num_chars; ++i) { + uppered[i] = lowered[i] = static_cast(i); + } + AsciiStrCaseFold(&lowered[0], &lowered[num_chars]); + AsciiStrCaseFold(&uppered[0], &uppered[num_chars]); + for (size_t i = 0; i < num_chars; ++i) { + const char ch = static_cast(i), + ch_upper = ('a' <= ch && ch <= 'z' ? 'A' + (ch - 'a') : ch), + ch_lower = ('A' <= ch && ch <= 'Z' ? 'a' + (ch - 'A') : ch); + if (uppered[i] != ch_upper || lowered[i] != ch_lower) { + incorrect_index = i > 0 ? i : num_chars; + break; + } + } + return incorrect_index; +} + +static_assert(ValidateAsciiCasefold() == 0, "error in case conversion"); + } // namespace ascii_internal void AsciiStrToLower(std::string* s) { - for (auto& ch : *s) { - ch = absl::ascii_tolower(static_cast(ch)); - } + char* p = &(*s)[0]; // Guaranteed to be valid for empty strings + return ascii_internal::AsciiStrCaseFold(p, p + s->size()); } void AsciiStrToUpper(std::string* s) { - for (auto& ch : *s) { - ch = absl::ascii_toupper(static_cast(ch)); - } + char* p = &(*s)[0]; // Guaranteed to be valid for empty strings + return ascii_internal::AsciiStrCaseFold(p, p + s->size()); } void RemoveExtraAsciiWhitespace(std::string* str) { -- cgit v1.2.3