diff options
Diffstat (limited to 'absl/strings/escaping.cc')
-rw-r--r-- | absl/strings/escaping.cc | 128 |
1 files changed, 58 insertions, 70 deletions
diff --git a/absl/strings/escaping.cc b/absl/strings/escaping.cc index 7d97944e..4dc69702 100644 --- a/absl/strings/escaping.cc +++ b/absl/strings/escaping.cc @@ -42,11 +42,11 @@ constexpr bool kUnescapeNulls = false; inline bool is_octal_digit(char c) { return ('0' <= c) && (c <= '7'); } -inline unsigned int hex_digit_to_int(char c) { +inline int hex_digit_to_int(char c) { static_assert('0' == 0x30 && 'A' == 0x41 && 'a' == 0x61, "Character set must be ASCII."); - assert(absl::ascii_isxdigit(static_cast<unsigned char>(c))); - unsigned int x = static_cast<unsigned char>(c); + assert(absl::ascii_isxdigit(c)); + int x = static_cast<unsigned char>(c); if (x > '9') { x += 9; } @@ -121,29 +121,27 @@ bool CUnescapeInternal(absl::string_view source, bool leave_nulls_escaped, case '7': { // octal digit: 1 to 3 digits const char* octal_start = p; - unsigned int ch = static_cast<unsigned int>(*p - '0'); // digit 1 + unsigned int ch = *p - '0'; + if (p < last_byte && is_octal_digit(p[1])) ch = ch * 8 + *++p - '0'; if (p < last_byte && is_octal_digit(p[1])) - ch = ch * 8 + static_cast<unsigned int>(*++p - '0'); // digit 2 - if (p < last_byte && is_octal_digit(p[1])) - ch = ch * 8 + static_cast<unsigned int>(*++p - '0'); // digit 3 + ch = ch * 8 + *++p - '0'; // now points at last digit if (ch > 0xff) { if (error) { *error = "Value of \\" + - std::string(octal_start, - static_cast<size_t>(p + 1 - octal_start)) + + std::string(octal_start, p + 1 - octal_start) + " exceeds 0xff"; } return false; } if ((ch == 0) && leave_nulls_escaped) { // Copy the escape sequence for the null character - const size_t octal_size = static_cast<size_t>(p + 1 - octal_start); + const ptrdiff_t octal_size = p + 1 - octal_start; *d++ = '\\'; memmove(d, octal_start, octal_size); d += octal_size; break; } - *d++ = static_cast<char>(ch); + *d++ = ch; break; } case 'x': @@ -151,34 +149,32 @@ bool CUnescapeInternal(absl::string_view source, bool leave_nulls_escaped, if (p >= last_byte) { if (error) *error = "String cannot end with \\x"; return false; - } else if (!absl::ascii_isxdigit(static_cast<unsigned char>(p[1]))) { + } else if (!absl::ascii_isxdigit(p[1])) { if (error) *error = "\\x cannot be followed by a non-hex digit"; return false; } unsigned int ch = 0; const char* hex_start = p; - while (p < last_byte && - absl::ascii_isxdigit(static_cast<unsigned char>(p[1]))) + while (p < last_byte && absl::ascii_isxdigit(p[1])) // Arbitrarily many hex digits ch = (ch << 4) + hex_digit_to_int(*++p); if (ch > 0xFF) { if (error) { *error = "Value of \\" + - std::string(hex_start, - static_cast<size_t>(p + 1 - hex_start)) + + std::string(hex_start, p + 1 - hex_start) + " exceeds 0xff"; } return false; } if ((ch == 0) && leave_nulls_escaped) { // Copy the escape sequence for the null character - const size_t hex_size = static_cast<size_t>(p + 1 - hex_start); + const ptrdiff_t hex_size = p + 1 - hex_start; *d++ = '\\'; memmove(d, hex_start, hex_size); d += hex_size; break; } - *d++ = static_cast<char>(ch); + *d++ = ch; break; } case 'u': { @@ -188,20 +184,18 @@ bool CUnescapeInternal(absl::string_view source, bool leave_nulls_escaped, if (p + 4 >= end) { if (error) { *error = "\\u must be followed by 4 hex digits: \\" + - std::string(hex_start, - static_cast<size_t>(p + 1 - hex_start)); + std::string(hex_start, p + 1 - hex_start); } return false; } for (int i = 0; i < 4; ++i) { // Look one char ahead. - if (absl::ascii_isxdigit(static_cast<unsigned char>(p[1]))) { + if (absl::ascii_isxdigit(p[1])) { rune = (rune << 4) + hex_digit_to_int(*++p); // Advance p. } else { if (error) { *error = "\\u must be followed by 4 hex digits: \\" + - std::string(hex_start, - static_cast<size_t>(p + 1 - hex_start)); + std::string(hex_start, p + 1 - hex_start); } return false; } @@ -226,22 +220,20 @@ bool CUnescapeInternal(absl::string_view source, bool leave_nulls_escaped, if (p + 8 >= end) { if (error) { *error = "\\U must be followed by 8 hex digits: \\" + - std::string(hex_start, - static_cast<size_t>(p + 1 - hex_start)); + std::string(hex_start, p + 1 - hex_start); } return false; } for (int i = 0; i < 8; ++i) { // Look one char ahead. - if (absl::ascii_isxdigit(static_cast<unsigned char>(p[1]))) { + if (absl::ascii_isxdigit(p[1])) { // Don't change rune until we're sure this // is within the Unicode limit, but do advance p. uint32_t newrune = (rune << 4) + hex_digit_to_int(*++p); if (newrune > 0x10FFFF) { if (error) { *error = "Value of \\" + - std::string(hex_start, - static_cast<size_t>(p + 1 - hex_start)) + + std::string(hex_start, p + 1 - hex_start) + " exceeds Unicode limit (0x10FFFF)"; } return false; @@ -251,8 +243,7 @@ bool CUnescapeInternal(absl::string_view source, bool leave_nulls_escaped, } else { if (error) { *error = "\\U must be followed by 8 hex digits: \\" + - std::string(hex_start, - static_cast<size_t>(p + 1 - hex_start)); + std::string(hex_start, p + 1 - hex_start); } return false; } @@ -300,7 +291,7 @@ bool CUnescapeInternal(absl::string_view source, bool leave_nulls_escaped, error)) { return false; } - dest->erase(static_cast<size_t>(dest_size)); + dest->erase(dest_size); return true; } @@ -320,7 +311,7 @@ std::string CEscapeInternal(absl::string_view src, bool use_hex, std::string dest; bool last_hex_escape = false; // true if last output char was \xNN. - for (char c : src) { + for (unsigned char c : src) { bool is_hex_escape = false; switch (c) { case '\n': dest.append("\\" "n"); break; @@ -329,30 +320,28 @@ std::string CEscapeInternal(absl::string_view src, bool use_hex, case '\"': dest.append("\\" "\""); break; case '\'': dest.append("\\" "'"); break; case '\\': dest.append("\\" "\\"); break; - default: { + default: // Note that if we emit \xNN and the src character after that is a hex // digit then that digit must be escaped too to prevent it being // interpreted as part of the character code by C. - const unsigned char uc = static_cast<unsigned char>(c); - if ((!utf8_safe || uc < 0x80) && - (!absl::ascii_isprint(uc) || - (last_hex_escape && absl::ascii_isxdigit(uc)))) { + if ((!utf8_safe || c < 0x80) && + (!absl::ascii_isprint(c) || + (last_hex_escape && absl::ascii_isxdigit(c)))) { if (use_hex) { dest.append("\\" "x"); - dest.push_back(numbers_internal::kHexChar[uc / 16]); - dest.push_back(numbers_internal::kHexChar[uc % 16]); + dest.push_back(numbers_internal::kHexChar[c / 16]); + dest.push_back(numbers_internal::kHexChar[c % 16]); is_hex_escape = true; } else { dest.append("\\"); - dest.push_back(numbers_internal::kHexChar[uc / 64]); - dest.push_back(numbers_internal::kHexChar[(uc % 64) / 8]); - dest.push_back(numbers_internal::kHexChar[uc % 8]); + dest.push_back(numbers_internal::kHexChar[c / 64]); + dest.push_back(numbers_internal::kHexChar[(c % 64) / 8]); + dest.push_back(numbers_internal::kHexChar[c % 8]); } } else { dest.push_back(c); break; } - } } last_hex_escape = is_hex_escape; } @@ -361,7 +350,7 @@ std::string CEscapeInternal(absl::string_view src, bool use_hex, } /* clang-format off */ -constexpr unsigned char c_escaped_len[256] = { +constexpr char c_escaped_len[256] = { 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 4, 4, 2, 4, 4, // \t, \n, \r 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, // ", ' @@ -386,8 +375,7 @@ constexpr unsigned char c_escaped_len[256] = { // that UTF-8 bytes are not handled specially. inline size_t CEscapedLength(absl::string_view src) { size_t escaped_len = 0; - for (char c : src) - escaped_len += c_escaped_len[static_cast<unsigned char>(c)]; + for (unsigned char c : src) escaped_len += c_escaped_len[c]; return escaped_len; } @@ -403,8 +391,8 @@ void CEscapeAndAppendInternal(absl::string_view src, std::string* dest) { cur_dest_len + escaped_len); char* append_ptr = &(*dest)[cur_dest_len]; - for (char c : src) { - size_t char_len = c_escaped_len[static_cast<unsigned char>(c)]; + for (unsigned char c : src) { + int char_len = c_escaped_len[c]; if (char_len == 1) { *append_ptr++ = c; } else if (char_len == 2) { @@ -436,9 +424,9 @@ void CEscapeAndAppendInternal(absl::string_view src, std::string* dest) { } } else { *append_ptr++ = '\\'; - *append_ptr++ = '0' + static_cast<unsigned char>(c) / 64; - *append_ptr++ = '0' + (static_cast<unsigned char>(c) % 64) / 8; - *append_ptr++ = '0' + static_cast<unsigned char>(c) % 8; + *append_ptr++ = '0' + c / 64; + *append_ptr++ = '0' + (c % 64) / 8; + *append_ptr++ = '0' + c % 8; } } } @@ -452,7 +440,7 @@ bool Base64UnescapeInternal(const char* src_param, size_t szsrc, char* dest, size_t destidx = 0; int decode = 0; int state = 0; - unsigned char ch = 0; + unsigned int ch = 0; unsigned int temp = 0; // If "char" is signed by default, using *src as an array index results in @@ -512,13 +500,13 @@ bool Base64UnescapeInternal(const char* src_param, size_t szsrc, char* dest, // how to handle those cases. GET_INPUT(first, 4); - temp = static_cast<unsigned char>(decode); + temp = decode; GET_INPUT(second, 3); - temp = (temp << 6) | static_cast<unsigned char>(decode); + temp = (temp << 6) | decode; GET_INPUT(third, 2); - temp = (temp << 6) | static_cast<unsigned char>(decode); + temp = (temp << 6) | decode; GET_INPUT(fourth, 1); - temp = (temp << 6) | static_cast<unsigned char>(decode); + temp = (temp << 6) | decode; } else { // We really did have four good data bytes, so advance four // characters in the string. @@ -530,11 +518,11 @@ bool Base64UnescapeInternal(const char* src_param, size_t szsrc, char* dest, // temp has 24 bits of input, so write that out as three bytes. if (destidx + 3 > szdest) return false; - dest[destidx + 2] = static_cast<char>(temp); + dest[destidx + 2] = temp; temp >>= 8; - dest[destidx + 1] = static_cast<char>(temp); + dest[destidx + 1] = temp; temp >>= 8; - dest[destidx] = static_cast<char>(temp); + dest[destidx] = temp; destidx += 3; } } else { @@ -595,18 +583,18 @@ bool Base64UnescapeInternal(const char* src_param, size_t szsrc, char* dest, } // Each input character gives us six bits of output. - temp = (temp << 6) | static_cast<unsigned char>(decode); + temp = (temp << 6) | decode; ++state; if (state == 4) { // If we've accumulated 24 bits of output, write that out as // three bytes. if (dest) { if (destidx + 3 > szdest) return false; - dest[destidx + 2] = static_cast<char>(temp); + dest[destidx + 2] = temp; temp >>= 8; - dest[destidx + 1] = static_cast<char>(temp); + dest[destidx + 1] = temp; temp >>= 8; - dest[destidx] = static_cast<char>(temp); + dest[destidx] = temp; } destidx += 3; state = 0; @@ -631,7 +619,7 @@ bool Base64UnescapeInternal(const char* src_param, size_t szsrc, char* dest, if (dest) { if (destidx + 1 > szdest) return false; temp >>= 4; - dest[destidx] = static_cast<char>(temp); + dest[destidx] = temp; } ++destidx; expected_equals = 2; @@ -642,9 +630,9 @@ bool Base64UnescapeInternal(const char* src_param, size_t szsrc, char* dest, if (dest) { if (destidx + 2 > szdest) return false; temp >>= 2; - dest[destidx + 1] = static_cast<char>(temp); + dest[destidx + 1] = temp; temp >>= 8; - dest[destidx] = static_cast<char>(temp); + dest[destidx] = temp; } destidx += 2; expected_equals = 1; @@ -834,9 +822,9 @@ constexpr char kHexValueLenient[256] = { // or a string. This works because we use the [] operator to access // individual characters at a time. template <typename T> -void HexStringToBytesInternal(const char* from, T to, size_t num) { - for (size_t i = 0; i < num; i++) { - to[i] = static_cast<char>(kHexValueLenient[from[i * 2] & 0xFF] << 4) + +void HexStringToBytesInternal(const char* from, T to, ptrdiff_t num) { + for (int i = 0; i < num; i++) { + to[i] = (kHexValueLenient[from[i * 2] & 0xFF] << 4) + (kHexValueLenient[from[i * 2 + 1] & 0xFF]); } } @@ -844,7 +832,7 @@ void HexStringToBytesInternal(const char* from, T to, size_t num) { // This is a templated function so that T can be either a char* or a // std::string. template <typename T> -void BytesToHexStringInternal(const unsigned char* src, T dest, size_t num) { +void BytesToHexStringInternal(const unsigned char* src, T dest, ptrdiff_t num) { auto dest_ptr = &dest[0]; for (auto src_ptr = src; src_ptr != (src + num); ++src_ptr, dest_ptr += 2) { const char* hex_p = &numbers_internal::kHexTable[*src_ptr * 2]; |