diff options
author | Chris Mihelich <cmihelic@google.com> | 2024-06-10 12:17:24 -0700 |
---|---|---|
committer | Copybara-Service <copybara-worker@google.com> | 2024-06-10 12:18:15 -0700 |
commit | d30298a1b6f3dd8939910561e211fe990e4e2e8e (patch) | |
tree | 25c12d7fd3ed9fbacf74fde29914651d290ffe74 /absl/debugging/internal/utf8_for_code_point.cc | |
parent | 96cdf6cc87e7a21d92f9f96a72263a93d3929ec7 (diff) | |
download | abseil-d30298a1b6f3dd8939910561e211fe990e4e2e8e.tar.gz abseil-d30298a1b6f3dd8939910561e211fe990e4e2e8e.tar.bz2 abseil-d30298a1b6f3dd8939910561e211fe990e4e2e8e.zip |
UTF-8 encoding library to support Rust Punycode decoding.
PiperOrigin-RevId: 641983507
Change-Id: Iad7933884aef6bfd90d159c049a1d698d19456c6
Diffstat (limited to 'absl/debugging/internal/utf8_for_code_point.cc')
-rw-r--r-- | absl/debugging/internal/utf8_for_code_point.cc | 70 |
1 files changed, 70 insertions, 0 deletions
diff --git a/absl/debugging/internal/utf8_for_code_point.cc b/absl/debugging/internal/utf8_for_code_point.cc new file mode 100644 index 00000000..658a3b51 --- /dev/null +++ b/absl/debugging/internal/utf8_for_code_point.cc @@ -0,0 +1,70 @@ +// Copyright 2024 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/debugging/internal/utf8_for_code_point.h" + +#include <cstdint> + +#include "absl/base/config.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace debugging_internal { +namespace { + +// UTF-8 encoding bounds. +constexpr uint32_t kMinSurrogate = 0xd800, kMaxSurrogate = 0xdfff; +constexpr uint32_t kMax1ByteCodePoint = 0x7f; +constexpr uint32_t kMax2ByteCodePoint = 0x7ff; +constexpr uint32_t kMax3ByteCodePoint = 0xffff; +constexpr uint32_t kMaxCodePoint = 0x10ffff; + +} // namespace + +Utf8ForCodePoint::Utf8ForCodePoint(uint64_t code_point) { + if (code_point <= kMax1ByteCodePoint) { + length = 1; + bytes[0] = static_cast<char>(code_point); + return; + } + + if (code_point <= kMax2ByteCodePoint) { + length = 2; + bytes[0] = static_cast<char>(0xc0 | (code_point >> 6)); + bytes[1] = static_cast<char>(0x80 | (code_point & 0x3f)); + return; + } + + if (kMinSurrogate <= code_point && code_point <= kMaxSurrogate) return; + + if (code_point <= kMax3ByteCodePoint) { + length = 3; + bytes[0] = static_cast<char>(0xe0 | (code_point >> 12)); + bytes[1] = static_cast<char>(0x80 | ((code_point >> 6) & 0x3f)); + bytes[2] = static_cast<char>(0x80 | (code_point & 0x3f)); + return; + } + + if (code_point > kMaxCodePoint) return; + + length = 4; + bytes[0] = static_cast<char>(0xf0 | (code_point >> 18)); + bytes[1] = static_cast<char>(0x80 | ((code_point >> 12) & 0x3f)); + bytes[2] = static_cast<char>(0x80 | ((code_point >> 6) & 0x3f)); + bytes[3] = static_cast<char>(0x80 | (code_point & 0x3f)); +} + +} // namespace debugging_internal +ABSL_NAMESPACE_END +} // namespace absl |