diff options
Diffstat (limited to 'absl/debugging/internal')
-rw-r--r-- | absl/debugging/internal/bounded_utf8_length_sequence.h | 126 | ||||
-rw-r--r-- | absl/debugging/internal/bounded_utf8_length_sequence_test.cc | 126 | ||||
-rw-r--r-- | absl/debugging/internal/decode_rust_punycode.cc | 258 | ||||
-rw-r--r-- | absl/debugging/internal/decode_rust_punycode.h | 55 | ||||
-rw-r--r-- | absl/debugging/internal/decode_rust_punycode_test.cc | 606 | ||||
-rw-r--r-- | absl/debugging/internal/demangle.cc | 1143 | ||||
-rw-r--r-- | absl/debugging/internal/demangle.h | 3 | ||||
-rw-r--r-- | absl/debugging/internal/demangle_rust.cc | 925 | ||||
-rw-r--r-- | absl/debugging/internal/demangle_rust.h | 42 | ||||
-rw-r--r-- | absl/debugging/internal/demangle_rust_test.cc | 584 | ||||
-rw-r--r-- | absl/debugging/internal/demangle_test.cc | 1787 | ||||
-rw-r--r-- | absl/debugging/internal/elf_mem_image.cc | 59 | ||||
-rw-r--r-- | absl/debugging/internal/elf_mem_image.h | 15 | ||||
-rw-r--r-- | absl/debugging/internal/stacktrace_aarch64-inl.inc | 17 | ||||
-rw-r--r-- | absl/debugging/internal/utf8_for_code_point.cc | 70 | ||||
-rw-r--r-- | absl/debugging/internal/utf8_for_code_point.h | 47 | ||||
-rw-r--r-- | absl/debugging/internal/utf8_for_code_point_test.cc | 175 |
17 files changed, 5922 insertions, 116 deletions
diff --git a/absl/debugging/internal/bounded_utf8_length_sequence.h b/absl/debugging/internal/bounded_utf8_length_sequence.h new file mode 100644 index 00000000..188e06c4 --- /dev/null +++ b/absl/debugging/internal/bounded_utf8_length_sequence.h @@ -0,0 +1,126 @@ +// Copyright 2024 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_DEBUGGING_INTERNAL_BOUNDED_UTF8_LENGTH_SEQUENCE_H_ +#define ABSL_DEBUGGING_INTERNAL_BOUNDED_UTF8_LENGTH_SEQUENCE_H_ + +#include <cstdint> + +#include "absl/base/config.h" +#include "absl/numeric/bits.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace debugging_internal { + +// A sequence of up to max_elements integers between 1 and 4 inclusive, whose +// insertion operation computes the sum of all the elements before the insertion +// point. This is useful in decoding Punycode, where one needs to know where in +// a UTF-8 byte stream the n-th code point begins. +// +// BoundedUtf8LengthSequence is async-signal-safe and suitable for use in +// symbolizing stack traces in a signal handler, provided max_elements is not +// improvidently large. For inputs of lengths accepted by the Rust demangler, +// up to a couple hundred code points, InsertAndReturnSumOfPredecessors should +// run in a few dozen clock cycles, on par with the other arithmetic required +// for Punycode decoding. +template <uint32_t max_elements> +class BoundedUtf8LengthSequence { + public: + // Constructs an empty sequence. + BoundedUtf8LengthSequence() = default; + + // Inserts `utf_length` at position `index`, shifting any existing elements at + // or beyond `index` one position to the right. If the sequence is already + // full, the rightmost element is discarded. + // + // Returns the sum of the elements at positions 0 to `index - 1` inclusive. + // If `index` is greater than the number of elements already inserted, the + // excess positions in the range count 1 apiece. + // + // REQUIRES: index < max_elements and 1 <= utf8_length <= 4. + uint32_t InsertAndReturnSumOfPredecessors( + uint32_t index, uint32_t utf8_length) { + // The caller shouldn't pass out-of-bounds inputs, but if it does happen, + // clamp the values and try to continue. If we're being called from a + // signal handler, the last thing we want to do is crash. Emitting + // malformed UTF-8 is a lesser evil. + if (index >= max_elements) index = max_elements - 1; + if (utf8_length == 0 || utf8_length > 4) utf8_length = 1; + + const uint32_t word_index = index/32; + const uint32_t bit_index = 2 * (index % 32); + const uint64_t ones_bit = uint64_t{1} << bit_index; + + // Compute the sum of predecessors. + // - Each value from 1 to 4 is represented by a bit field with value from + // 0 to 3, so the desired sum is index plus the sum of the + // representations actually stored. + // - For each bit field, a set low bit should contribute 1 to the sum, and + // a set high bit should contribute 2. + // - Another way to say the same thing is that each set bit contributes 1, + // and each set high bit contributes an additional 1. + // - So the sum we want is index + popcount(everything) + popcount(bits in + // odd positions). + const uint64_t odd_bits_mask = 0xaaaaaaaaaaaaaaaa; + const uint64_t lower_seminibbles_mask = ones_bit - 1; + const uint64_t higher_seminibbles_mask = ~lower_seminibbles_mask; + const uint64_t same_word_bits_below_insertion = + rep_[word_index] & lower_seminibbles_mask; + int full_popcount = absl::popcount(same_word_bits_below_insertion); + int odd_popcount = + absl::popcount(same_word_bits_below_insertion & odd_bits_mask); + for (uint32_t j = word_index; j > 0; --j) { + const uint64_t word_below_insertion = rep_[j - 1]; + full_popcount += absl::popcount(word_below_insertion); + odd_popcount += absl::popcount(word_below_insertion & odd_bits_mask); + } + const uint32_t sum_of_predecessors = + index + static_cast<uint32_t>(full_popcount + odd_popcount); + + // Now insert utf8_length's representation, shifting successors up one + // place. + for (uint32_t j = max_elements/32 - 1; j > word_index; --j) { + rep_[j] = (rep_[j] << 2) | (rep_[j - 1] >> 62); + } + rep_[word_index] = + (rep_[word_index] & lower_seminibbles_mask) | + (uint64_t{utf8_length - 1} << bit_index) | + ((rep_[word_index] & higher_seminibbles_mask) << 2); + + return sum_of_predecessors; + } + + private: + // If the (32 * i + j)-th element of the represented sequence has the value k + // (0 <= j < 32, 1 <= k <= 4), then bits 2 * j and 2 * j + 1 of rep_[i] + // contain the seminibble (k - 1). + // + // In particular, the zero-initialization of rep_ makes positions not holding + // any inserted element count as 1 in InsertAndReturnSumOfPredecessors. + // + // Example: rep_ = {0xb1, ... the rest zeroes ...} represents the sequence + // (2, 1, 4, 3, ... the rest 1's ...). Constructing the sequence of Unicode + // code points "Àa🂻中" = {U+00C0, U+0061, U+1F0BB, U+4E2D} (among many + // other examples) would yield this value of rep_. + static_assert(max_elements > 0 && max_elements % 32 == 0, + "max_elements must be a positive multiple of 32"); + uint64_t rep_[max_elements/32] = {}; +}; + +} // namespace debugging_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_DEBUGGING_INTERNAL_BOUNDED_UTF8_LENGTH_SEQUENCE_H_ diff --git a/absl/debugging/internal/bounded_utf8_length_sequence_test.cc b/absl/debugging/internal/bounded_utf8_length_sequence_test.cc new file mode 100644 index 00000000..17a24fd1 --- /dev/null +++ b/absl/debugging/internal/bounded_utf8_length_sequence_test.cc @@ -0,0 +1,126 @@ +// Copyright 2024 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/debugging/internal/bounded_utf8_length_sequence.h" + +#include <cstdint> + +#include "gtest/gtest.h" +#include "absl/base/config.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace debugging_internal { +namespace { + +TEST(BoundedUtf8LengthSequenceTest, RemembersAValueOfOneCorrectly) { + BoundedUtf8LengthSequence<32> seq; + ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(0, 1), 0); + EXPECT_EQ(seq.InsertAndReturnSumOfPredecessors(1, 1), 1); +} + +TEST(BoundedUtf8LengthSequenceTest, RemembersAValueOfTwoCorrectly) { + BoundedUtf8LengthSequence<32> seq; + ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(0, 2), 0); + EXPECT_EQ(seq.InsertAndReturnSumOfPredecessors(1, 1), 2); +} + +TEST(BoundedUtf8LengthSequenceTest, RemembersAValueOfThreeCorrectly) { + BoundedUtf8LengthSequence<32> seq; + ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(0, 3), 0); + EXPECT_EQ(seq.InsertAndReturnSumOfPredecessors(1, 1), 3); +} + +TEST(BoundedUtf8LengthSequenceTest, RemembersAValueOfFourCorrectly) { + BoundedUtf8LengthSequence<32> seq; + ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(0, 4), 0); + EXPECT_EQ(seq.InsertAndReturnSumOfPredecessors(1, 1), 4); +} + +TEST(BoundedUtf8LengthSequenceTest, RemembersSeveralAppendedValues) { + BoundedUtf8LengthSequence<32> seq; + ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(0, 1), 0); + ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(1, 4), 1); + ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(2, 2), 5); + ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(3, 3), 7); + ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(4, 1), 10); +} + +TEST(BoundedUtf8LengthSequenceTest, RemembersSeveralPrependedValues) { + BoundedUtf8LengthSequence<32> seq; + ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(0, 4), 0); + ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(0, 3), 0); + ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(0, 2), 0); + ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(0, 1), 0); + ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(4, 1), 10); + ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(3, 1), 6); + ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(2, 1), 3); + ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(1, 1), 1); +} + +TEST(BoundedUtf8LengthSequenceTest, RepeatedInsertsShiftValuesOutTheRightEnd) { + BoundedUtf8LengthSequence<32> seq; + ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(0, 2), 0); + for (uint32_t i = 1; i < 31; ++i) { + ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(0, 1), 0) + << "while moving the 2 into position " << i; + ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(31, 1), 32) + << "after moving the 2 into position " << i; + } + ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(0, 1), 0) + << "while moving the 2 into position 31"; + EXPECT_EQ(seq.InsertAndReturnSumOfPredecessors(31, 1), 31) + << "after moving the 2 into position 31"; +} + +TEST(BoundedUtf8LengthSequenceTest, InsertsIntoWord1LeaveWord0Untouched) { + BoundedUtf8LengthSequence<64> seq; + for (uint32_t i = 0; i < 32; ++i) { + ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(i, 2), 2 * i) + << "at index " << i; + } + EXPECT_EQ(seq.InsertAndReturnSumOfPredecessors(32, 1), 64); + EXPECT_EQ(seq.InsertAndReturnSumOfPredecessors(32, 1), 64); +} + +TEST(BoundedUtf8LengthSequenceTest, InsertsIntoWord0ShiftValuesIntoWord1) { + BoundedUtf8LengthSequence<64> seq; + ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(29, 2), 29); + ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(30, 3), 31); + ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(31, 4), 34); + + // Pushing two 1's on the front moves the 3 and 4 into the high word. + ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(0, 1), 0); + ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(0, 1), 0); + ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(34, 1), 31 + 2 + 3 + 4); + ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(32, 1), 31 + 2); +} + +TEST(BoundedUtf8LengthSequenceTest, ValuesAreShiftedCorrectlyAmongThreeWords) { + BoundedUtf8LengthSequence<96> seq; + ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(31, 3), 31); + ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(63, 4), 62 + 3); + + // This insertion moves both the 3 and the 4 up a word. + ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(0, 1), 0); + ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(65, 1), 63 + 3 + 4); + ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(64, 1), 63 + 3); + ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(33, 1), 32 + 3); + ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(32, 1), 32); +} + +} // namespace +} // namespace debugging_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/debugging/internal/decode_rust_punycode.cc b/absl/debugging/internal/decode_rust_punycode.cc new file mode 100644 index 00000000..43b46bf9 --- /dev/null +++ b/absl/debugging/internal/decode_rust_punycode.cc @@ -0,0 +1,258 @@ +// Copyright 2024 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/debugging/internal/decode_rust_punycode.h" + +#include <cstddef> +#include <cstdint> +#include <cstring> + +#include "absl/base/config.h" +#include "absl/base/nullability.h" +#include "absl/debugging/internal/bounded_utf8_length_sequence.h" +#include "absl/debugging/internal/utf8_for_code_point.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace debugging_internal { + +namespace { + +// Decoding Punycode requires repeated random-access insertion into a stream of +// variable-length UTF-8 code-point encodings. We need this to be tolerably +// fast (no N^2 slowdown for unfortunate inputs), and we can't allocate any data +// structures on the heap (async-signal-safety). +// +// It is pragmatic to impose a moderately low limit on the identifier length and +// bail out if we ever hit it. Then BoundedUtf8LengthSequence efficiently +// determines where to insert the next code point, and memmove efficiently makes +// room for it. +// +// The chosen limit is a round number several times larger than identifiers +// expected in practice, yet still small enough that a memmove of this many +// UTF-8 characters is not much more expensive than the division and modulus +// operations that Punycode decoding requires. +constexpr uint32_t kMaxChars = 256; + +// Constants from RFC 3492 section 5. +constexpr uint32_t kBase = 36, kTMin = 1, kTMax = 26, kSkew = 38, kDamp = 700; + +constexpr uint32_t kMaxCodePoint = 0x10ffff; + +// Overflow threshold in DecodeRustPunycode's inner loop; see comments there. +constexpr uint32_t kMaxI = 1 << 30; + +// If punycode_begin .. punycode_end begins with a prefix matching the regular +// expression [0-9a-zA-Z_]+_, removes that prefix, copies all but the final +// underscore into out_begin .. out_end, sets num_ascii_chars to the number of +// bytes copied, and returns true. (A prefix of this sort represents the +// nonempty subsequence of ASCII characters in the corresponding plaintext.) +// +// If punycode_begin .. punycode_end does not contain an underscore, sets +// num_ascii_chars to zero and returns true. (The encoding of a plaintext +// without any ASCII characters does not carry such a prefix.) +// +// Returns false and zeroes num_ascii_chars on failure (either parse error or +// not enough space in the output buffer). +bool ConsumeOptionalAsciiPrefix(const char*& punycode_begin, + const char* const punycode_end, + char* const out_begin, + char* const out_end, + uint32_t& num_ascii_chars) { + num_ascii_chars = 0; + + // Remember the last underscore if any. Also use the same string scan to + // reject any ASCII bytes that do not belong in an identifier, including NUL, + // as well as non-ASCII bytes, which should have been delta-encoded instead. + int last_underscore = -1; + for (int i = 0; i < punycode_end - punycode_begin; ++i) { + const char c = punycode_begin[i]; + if (c == '_') { + last_underscore = i; + continue; + } + // We write out the meaning of absl::ascii_isalnum rather than call that + // function because its documentation does not promise it will remain + // async-signal-safe under future development. + if ('a' <= c && c <= 'z') continue; + if ('A' <= c && c <= 'Z') continue; + if ('0' <= c && c <= '9') continue; + return false; + } + + // If there was no underscore, that means there were no ASCII characters in + // the plaintext, so there is no prefix to consume. Our work is done. + if (last_underscore < 0) return true; + + // Otherwise there will be an underscore delimiter somewhere. It can't be + // initial because then there would be no ASCII characters to its left, and no + // delimiter would have been added in that case. + if (last_underscore == 0) return false; + + // Any other position is reasonable. Make sure there's room in the buffer. + if (last_underscore + 1 > out_end - out_begin) return false; + + // Consume and write out the ASCII characters. + num_ascii_chars = static_cast<uint32_t>(last_underscore); + std::memcpy(out_begin, punycode_begin, num_ascii_chars); + out_begin[num_ascii_chars] = '\0'; + punycode_begin += num_ascii_chars + 1; + return true; +} + +// Returns the value of `c` as a base-36 digit according to RFC 3492 section 5, +// or -1 if `c` is not such a digit. +int DigitValue(char c) { + if ('0' <= c && c <= '9') return c - '0' + 26; + if ('a' <= c && c <= 'z') return c - 'a'; + if ('A' <= c && c <= 'Z') return c - 'A'; + return -1; +} + +// Consumes the next delta encoding from punycode_begin .. punycode_end, +// updating i accordingly. Returns true on success. Returns false on parse +// failure or arithmetic overflow. +bool ScanNextDelta(const char*& punycode_begin, const char* const punycode_end, + uint32_t bias, uint32_t& i) { + uint64_t w = 1; // 64 bits to prevent overflow in w *= kBase - t + + // "for k = base to infinity in steps of base do begin ... end" in RFC 3492 + // section 6.2. Each loop iteration scans one digit of the delta. + for (uint32_t k = kBase; punycode_begin != punycode_end; k += kBase) { + const int digit_value = DigitValue(*punycode_begin++); + if (digit_value < 0) return false; + + // Compute this in 64-bit arithmetic so we can check for overflow afterward. + const uint64_t new_i = i + static_cast<uint64_t>(digit_value) * w; + + // Valid deltas are bounded by (#chars already emitted) * kMaxCodePoint, but + // invalid input could encode an arbitrarily large delta. Nip that in the + // bud here. + static_assert( + kMaxI >= kMaxChars * kMaxCodePoint, + "kMaxI is too small to prevent spurious failures on good input"); + if (new_i > kMaxI) return false; + + static_assert( + kMaxI < (uint64_t{1} << 32), + "Make kMaxI smaller or i 64 bits wide to prevent silent wraparound"); + i = static_cast<uint32_t>(new_i); + + // Compute the threshold that determines whether this is the last digit and + // (if not) what the next digit's place value will be. This logic from RFC + // 3492 section 6.2 is explained in section 3.3. + uint32_t t; + if (k <= bias + kTMin) { + t = kTMin; + } else if (k >= bias + kTMax) { + t = kTMax; + } else { + t = k - bias; + } + if (static_cast<uint32_t>(digit_value) < t) return true; + + // If this gets too large, the range check on new_i in the next iteration + // will catch it. We know this multiplication will not overwrap because w + // is 64 bits wide. + w *= kBase - t; + } + return false; +} + +} // namespace + +absl::Nullable<char*> DecodeRustPunycode(DecodeRustPunycodeOptions options) { + const char* punycode_begin = options.punycode_begin; + const char* const punycode_end = options.punycode_end; + char* const out_begin = options.out_begin; + char* const out_end = options.out_end; + + // Write a NUL terminator first. Later memcpy calls will keep bumping it + // along to its new right place. + const size_t out_size = static_cast<size_t>(out_end - out_begin); + if (out_size == 0) return nullptr; + *out_begin = '\0'; + + // RFC 3492 section 6.2 begins here. We retain the names of integer variables + // appearing in that text. + uint32_t n = 128, i = 0, bias = 72, num_chars = 0; + + // If there are any ASCII characters, consume them and their trailing + // underscore delimiter. + if (!ConsumeOptionalAsciiPrefix(punycode_begin, punycode_end, + out_begin, out_end, num_chars)) { + return nullptr; + } + uint32_t total_utf8_bytes = num_chars; + + BoundedUtf8LengthSequence<kMaxChars> utf8_lengths; + + // "while the input is not exhausted do begin ... end" + while (punycode_begin != punycode_end) { + if (num_chars >= kMaxChars) return nullptr; + + const uint32_t old_i = i; + + if (!ScanNextDelta(punycode_begin, punycode_end, bias, i)) return nullptr; + + // Update bias as in RFC 3492 section 6.1. (We have inlined adapt.) + uint32_t delta = i - old_i; + delta /= (old_i == 0 ? kDamp : 2); + delta += delta/(num_chars + 1); + bias = 0; + while (delta > ((kBase - kTMin) * kTMax)/2) { + delta /= kBase - kTMin; + bias += kBase; + } + bias += ((kBase - kTMin + 1) * delta)/(delta + kSkew); + + // Back in section 6.2, compute the new code point and insertion index. + static_assert( + kMaxI + kMaxCodePoint < (uint64_t{1} << 32), + "Make kMaxI smaller or n 64 bits wide to prevent silent wraparound"); + n += i/(num_chars + 1); + i %= num_chars + 1; + + // To actually insert, we need to convert the code point n to UTF-8 and the + // character index i to an index into the byte stream emitted so far. First + // prepare the UTF-8 encoding for n, rejecting surrogates, overlarge values, + // and anything that won't fit into the remaining output storage. + Utf8ForCodePoint utf8_for_code_point(n); + if (!utf8_for_code_point.ok()) return nullptr; + if (total_utf8_bytes + utf8_for_code_point.length + 1 > out_size) { + return nullptr; + } + + // Now insert the new character into both our length map and the output. + uint32_t n_index = + utf8_lengths.InsertAndReturnSumOfPredecessors( + i, utf8_for_code_point.length); + std::memmove( + out_begin + n_index + utf8_for_code_point.length, out_begin + n_index, + total_utf8_bytes + 1 - n_index); + std::memcpy(out_begin + n_index, utf8_for_code_point.bytes, + utf8_for_code_point.length); + total_utf8_bytes += utf8_for_code_point.length; + ++num_chars; + + // Finally, advance to the next state before continuing. + ++i; + } + + return out_begin + total_utf8_bytes; +} + +} // namespace debugging_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/debugging/internal/decode_rust_punycode.h b/absl/debugging/internal/decode_rust_punycode.h new file mode 100644 index 00000000..0ae53ff3 --- /dev/null +++ b/absl/debugging/internal/decode_rust_punycode.h @@ -0,0 +1,55 @@ +// Copyright 2024 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_DEBUGGING_INTERNAL_DECODE_RUST_PUNYCODE_H_ +#define ABSL_DEBUGGING_INTERNAL_DECODE_RUST_PUNYCODE_H_ + +#include "absl/base/config.h" +#include "absl/base/nullability.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace debugging_internal { + +struct DecodeRustPunycodeOptions { + const char* punycode_begin; + const char* punycode_end; + char* out_begin; + char* out_end; +}; + +// Given Rust Punycode in `punycode_begin .. punycode_end`, writes the +// corresponding UTF-8 plaintext into `out_begin .. out_end`, followed by a NUL +// character, and returns a pointer to that final NUL on success. On failure +// returns a null pointer, and the contents of `out_begin .. out_end` are +// unspecified. +// +// Failure occurs in precisely these cases: +// - Any input byte does not match [0-9a-zA-Z_]. +// - The first input byte is an underscore, but no other underscore appears in +// the input. +// - The delta sequence does not represent a valid sequence of code-point +// insertions. +// - The plaintext would contain more than 256 code points. +// +// DecodeRustPunycode is async-signal-safe with bounded runtime and a small +// stack footprint, making it suitable for use in demangling Rust symbol names +// from a signal handler. +absl::Nullable<char*> DecodeRustPunycode(DecodeRustPunycodeOptions options); + +} // namespace debugging_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_DEBUGGING_INTERNAL_DECODE_RUST_PUNYCODE_H_ diff --git a/absl/debugging/internal/decode_rust_punycode_test.cc b/absl/debugging/internal/decode_rust_punycode_test.cc new file mode 100644 index 00000000..78d1c332 --- /dev/null +++ b/absl/debugging/internal/decode_rust_punycode_test.cc @@ -0,0 +1,606 @@ +// Copyright 2024 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/debugging/internal/decode_rust_punycode.h" + +#include <cstddef> +#include <cstring> +#include <string> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/config.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace debugging_internal { +namespace { + +using ::testing::AllOf; +using ::testing::Eq; +using ::testing::IsNull; +using ::testing::Pointee; +using ::testing::ResultOf; +using ::testing::StrEq; + +class DecodeRustPunycodeTest : public ::testing::Test { + protected: + void FillBufferWithNonzeroBytes() { + // The choice of nonzero value to fill with is arbitrary. The point is just + // to fail tests if DecodeRustPunycode forgets to write the final NUL + // character. + std::memset(buffer_storage_, 0xab, sizeof(buffer_storage_)); + } + + DecodeRustPunycodeOptions WithAmpleSpace() { + FillBufferWithNonzeroBytes(); + + DecodeRustPunycodeOptions options; + options.punycode_begin = punycode_.data(); + options.punycode_end = punycode_.data() + punycode_.size(); + options.out_begin = buffer_storage_; + options.out_end = buffer_storage_ + sizeof(buffer_storage_); + return options; + } + + DecodeRustPunycodeOptions WithJustEnoughSpace() { + FillBufferWithNonzeroBytes(); + + const size_t begin_offset = sizeof(buffer_storage_) - plaintext_.size() - 1; + DecodeRustPunycodeOptions options; + options.punycode_begin = punycode_.data(); + options.punycode_end = punycode_.data() + punycode_.size(); + options.out_begin = buffer_storage_ + begin_offset; + options.out_end = buffer_storage_ + sizeof(buffer_storage_); + return options; + } + + DecodeRustPunycodeOptions WithOneByteTooFew() { + FillBufferWithNonzeroBytes(); + + const size_t begin_offset = sizeof(buffer_storage_) - plaintext_.size(); + DecodeRustPunycodeOptions options; + options.punycode_begin = punycode_.data(); + options.punycode_end = punycode_.data() + punycode_.size(); + options.out_begin = buffer_storage_ + begin_offset; + options.out_end = buffer_storage_ + sizeof(buffer_storage_); + return options; + } + + // Matches a correct return value of DecodeRustPunycode when `golden` is the + // expected plaintext output. + auto PointsToTheNulAfter(const std::string& golden) { + const size_t golden_size = golden.size(); + return AllOf( + Pointee(Eq('\0')), + ResultOf("preceding string body", + [golden_size](const char* p) { return p - golden_size; }, + StrEq(golden))); + } + + std::string punycode_; + std::string plaintext_; + char buffer_storage_[1024]; +}; + +TEST_F(DecodeRustPunycodeTest, MapsEmptyToEmpty) { + punycode_ = ""; + plaintext_ = ""; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, + StripsTheTrailingDelimiterFromAPureRunOfBasicChars) { + punycode_ = "foo_"; + plaintext_ = "foo"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, TreatsTheLastUnderscoreAsTheDelimiter) { + punycode_ = "foo_bar_"; + plaintext_ = "foo_bar"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, AcceptsALeadingUnderscoreIfNotTheDelimiter) { + punycode_ = "_foo_"; + plaintext_ = "_foo"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, RejectsALeadingUnderscoreDelimiter) { + punycode_ = "_foo"; + + EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, RejectsEmbeddedNul) { + punycode_ = std::string("foo\0bar_", 8); + + EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, RejectsAsciiCharsOtherThanIdentifierChars) { + punycode_ = "foo\007_"; + EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); + + punycode_ = "foo-_"; + EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); + + punycode_ = "foo;_"; + EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); + + punycode_ = "foo\177_"; + EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, RejectsRawNonAsciiChars) { + punycode_ = "\x80"; + EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); + + punycode_ = "\x80_"; + EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); + + punycode_ = "\xff"; + EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); + + punycode_ = "\xff_"; + EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, RecognizesU0080) { + // a encodes 0, so the output is the smallest non-ASCII code point standing + // alone. (U+0080 PAD is not an identifier character, but DecodeRustPunycode + // does not check whether non-ASCII characters could belong to an identifier.) + punycode_ = "a"; + plaintext_ = "\xc2\x80"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, OneByteDeltaSequencesMustBeA) { + // Because bias = 72 for the first code point, any digit but a/A is nonfinal + // in one of the first two bytes of a delta sequence. + punycode_ = "b"; + EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); + + punycode_ = "z"; + EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); + + punycode_ = "0"; + EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); + + punycode_ = "9"; + EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, AcceptsDeltaSequenceBA) { + punycode_ = "ba"; + plaintext_ = "\xc2\x81"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, AcceptsOtherDeltaSequencesWithSecondByteA) { + punycode_ = "ca"; + plaintext_ = "\xc2\x82"; + EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + + punycode_ = "za"; + plaintext_ = "\xc2\x99"; + EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + + punycode_ = "0a"; + plaintext_ = "\xc2\x9a"; + EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + + punycode_ = "1a"; + plaintext_ = "\xc2\x9b"; + EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + + punycode_ = "9a"; + plaintext_ = "£"; // Pound sign, U+00A3 + EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); +} + +TEST_F(DecodeRustPunycodeTest, RejectsDeltaWhereTheSecondAndLastDigitIsNotA) { + punycode_ = "bb"; + EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); + + punycode_ = "zz"; + EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); + + punycode_ = "00"; + EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); + + punycode_ = "99"; + EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, AcceptsDeltasWithSecondByteBFollowedByA) { + punycode_ = "bba"; + plaintext_ = "¤"; // U+00A4 + EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + + punycode_ = "cba"; + plaintext_ = "¥"; // U+00A5 + EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + + punycode_ = "zba"; + plaintext_ = "¼"; // U+00BC + EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + + punycode_ = "0ba"; + plaintext_ = "½"; // U+00BD + EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + + punycode_ = "1ba"; + plaintext_ = "¾"; // U+00BE + EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + + punycode_ = "9ba"; + plaintext_ = "Æ"; // U+00C6 + EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); +} + +// Tests beyond this point use characters allowed in identifiers, so you can +// prepend _RNvC1cu<decimal length><underscore if [0-9_] follows> to a test +// input and run it through another Rust demangler to verify that the +// corresponding golden output is correct. + +TEST_F(DecodeRustPunycodeTest, AcceptsTwoByteCharAlone) { + punycode_ = "0ca"; + plaintext_ = "à"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, AcceptsTwoByteCharBeforeBasicChars) { + punycode_ = "_la_mode_yya"; + plaintext_ = "à_la_mode"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, AcceptsTwoByteCharAmidBasicChars) { + punycode_ = "verre__vin_m4a"; + plaintext_ = "verre_à_vin"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, AcceptsTwoByteCharAfterBasicChars) { + punycode_ = "belt_3na"; + plaintext_ = "beltà"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, AcceptsRepeatedTwoByteChar) { + punycode_ = "0caaaa"; + plaintext_ = "àààà"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, AcceptsNearbyTwoByteCharsInOrder) { + punycode_ = "3camsuz"; + plaintext_ = "ãéïôù"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, AcceptsNearbyTwoByteCharsOutOfOrder) { + punycode_ = "3caltsx"; + plaintext_ = "ùéôãï"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, AcceptsThreeByteCharAlone) { + punycode_ = "fiq"; + plaintext_ = "中"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, AcceptsRepeatedThreeByteChar) { + punycode_ = "fiqaaaa"; + plaintext_ = "中中中中中"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, AcceptsThreeByteCharsInOrder) { + punycode_ = "fiq228c"; + plaintext_ = "中文"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, AcceptsNearbyThreeByteCharsOutOfOrder) { + punycode_ = "fiq128c"; + plaintext_ = "文中"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, AcceptsFourByteCharAlone) { + punycode_ = "uy7h"; + plaintext_ = "🂻"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, AcceptsFourByteCharBeforeBasicChars) { + punycode_ = "jack__uh63d"; + plaintext_ = "jack_🂻"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, AcceptsFourByteCharAmidBasicChars) { + punycode_ = "jack__of_hearts_ki37n"; + plaintext_ = "jack_🂻_of_hearts"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, AcceptsFourByteCharAfterBasicChars) { + punycode_ = "_of_hearts_kz45i"; + plaintext_ = "🂻_of_hearts"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, AcceptsRepeatedFourByteChar) { + punycode_ = "uy7haaaa"; + plaintext_ = "🂻🂻🂻🂻🂻"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, AcceptsNearbyFourByteCharsInOrder) { + punycode_ = "8x7hcjmf"; + plaintext_ = "🂦🂧🂪🂭🂮"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, AcceptsNearbyFourByteCharsOutOfOrder) { + punycode_ = "8x7hcild"; + plaintext_ = "🂮🂦🂭🂪🂧"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, AcceptsAMixtureOfByteLengths) { + punycode_ = "3caltsx2079ivf8aiuy7cja3a6ak"; + plaintext_ = "ùéôãï中文🂮🂦🂭🂪🂧"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +TEST_F(DecodeRustPunycodeTest, RejectsOverlargeDeltas) { + punycode_ = "123456789a"; + + EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull()); +} + +// Finally, we test on a few prose and poetry snippets as a defense in depth. +// If our artificial short test inputs did not exercise a bug that is tickled by +// patterns typical of real human writing, maybe real human writing will catch +// that. +// +// These test inputs are extracted from texts old enough to be out of copyright +// that probe a variety of ranges of code-point space. All are longer than 32 +// code points, so they exercise the carrying of seminibbles from one uint64_t +// to the next higher one in BoundedUtf8LengthSequence. + +// The first three lines of the Old English epic _Beowulf_, mostly ASCII with a +// few archaic two-byte letters interspersed. +TEST_F(DecodeRustPunycodeTest, Beowulf) { + punycode_ = "hwt_we_gardena_in_geardagum_" + "eodcyninga_rym_gefrunon_" + "hu_a_elingas_ellen_fremedon_hxg9c70do9alau"; + plaintext_ = "hwæt_we_gardena_in_geardagum_" + "þeodcyninga_þrym_gefrunon_" + "hu_ða_æþelingas_ellen_fremedon"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +// The whole of 過故人莊 by the 8th-century Chinese poet 孟浩然 +// (Meng Haoran), exercising three-byte-character processing. +TEST_F(DecodeRustPunycodeTest, MengHaoran) { + punycode_ = "gmq4ss0cfvao1e2wg8mcw8b0wkl9a7tt90a8riuvbk7t8kbv9a66ogofvzlf6" + "3d01ybn1u28dyqi5q2cxyyxnk5d2gx1ks9ddvfm17bk6gbsd6wftrav60u4ta"; + plaintext_ = "故人具雞黍" "邀我至田家" + "綠樹村邊合" "青山郭外斜" + "開軒面場圃" "把酒話桑麻" + "待到重陽日" "還來就菊花"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +// A poem of the 8th-century Japanese poet 山上憶良 (Yamanoue no Okura). +// Japanese mixes two-byte and three-byte characters: a good workout for codecs. +TEST_F(DecodeRustPunycodeTest, YamanoueNoOkura) { + punycode_ = "48jdaa3a6ccpepjrsmlb0q4bwcdtid8fg6c0cai9822utqeruk3om0u4f2wbp0" + "em23do0op23cc2ff70mb6tae8aq759gja"; + plaintext_ = "瓜食めば" + "子ども思ほゆ" + "栗食めば" + "まして偲はゆ" + "何処より" + "来りしものそ" + "眼交に" + "もとな懸りて" + "安眠し寝さぬ"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +// The first two lines of the Phoenician-language inscription on the sarcophagus +// of Eshmunazar II of Sidon, 6th century BCE. Phoenician and many other +// archaic scripts are allocated in the Supplemental Multilingual Plane (U+10000 +// through U+1FFFF) and thus exercise four-byte-character processing. +TEST_F(DecodeRustPunycodeTest, EshmunazarSarcophagus) { + punycode_ = "wj9caaabaabbaaohcacxvhdc7bgxbccbdcjeacddcedcdlddbdbddcdbdcknfcee" + "ifel8del2a7inq9fhcpxikms7a4a9ac9ataaa0g"; + plaintext_ = "𐤁𐤉𐤓𐤇𐤁𐤋𐤁𐤔𐤍𐤕𐤏𐤎𐤓" + "𐤅𐤀𐤓𐤁𐤏𐤗𐤖𐤖𐤖𐤖𐤋𐤌𐤋𐤊𐤉𐤌𐤋𐤊" + "𐤀𐤔𐤌𐤍𐤏𐤆𐤓𐤌𐤋𐤊𐤑𐤃𐤍𐤌" + "𐤁𐤍𐤌𐤋𐤊𐤕𐤁𐤍𐤕𐤌𐤋𐤊𐤑𐤃𐤍𐤌" + "𐤃𐤁𐤓𐤌𐤋𐤊𐤀𐤔𐤌𐤍𐤏𐤆𐤓𐤌𐤋𐤊" + "𐤑𐤃𐤍𐤌𐤋𐤀𐤌𐤓𐤍𐤂𐤆𐤋𐤕"; + + ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()), + PointsToTheNulAfter(plaintext_)); + ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()), + PointsToTheNulAfter(plaintext_)); + EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull()); +} + +} // namespace +} // namespace debugging_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/debugging/internal/demangle.cc b/absl/debugging/internal/demangle.cc index 381a2b50..caac7636 100644 --- a/absl/debugging/internal/demangle.cc +++ b/absl/debugging/internal/demangle.cc @@ -14,18 +14,19 @@ // For reference check out: // https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling -// -// Note that we only have partial C++11 support yet. #include "absl/debugging/internal/demangle.h" +#include <cstddef> #include <cstdint> #include <cstdio> #include <cstdlib> +#include <cstring> #include <limits> #include <string> #include "absl/base/config.h" +#include "absl/debugging/internal/demangle_rust.h" #if ABSL_INTERNAL_HAS_CXA_DEMANGLE #include <cxxabi.h> @@ -44,14 +45,16 @@ typedef struct { // List of operators from Itanium C++ ABI. static const AbbrevPair kOperatorList[] = { - // New has special syntax (not currently supported). + // New has special syntax. {"nw", "new", 0}, {"na", "new[]", 0}, - // Works except that the 'gs' prefix is not supported. + // Special-cased elsewhere to support the optional gs prefix. {"dl", "delete", 1}, {"da", "delete[]", 1}, + {"aw", "co_await", 1}, + {"ps", "+", 1}, // "positive" {"ng", "-", 1}, // "negative" {"ad", "&", 1}, // "address-of" @@ -79,6 +82,7 @@ static const AbbrevPair kOperatorList[] = { {"rs", ">>", 2}, {"lS", "<<=", 2}, {"rS", ">>=", 2}, + {"ss", "<=>", 2}, {"eq", "==", 2}, {"ne", "!=", 2}, {"lt", "<", 2}, @@ -98,6 +102,7 @@ static const AbbrevPair kOperatorList[] = { {"qu", "?", 3}, {"st", "sizeof", 0}, // Special syntax {"sz", "sizeof", 1}, // Not a real operator name, but used in expressions. + {"sZ", "sizeof...", 0}, // Special syntax {nullptr, nullptr, 0}, }; @@ -187,9 +192,50 @@ typedef struct { int recursion_depth; // For stack exhaustion prevention. int steps; // Cap how much work we'll do, regardless of depth. ParseState parse_state; // Backtrackable state copied for most frames. + + // Conditionally compiled support for marking the position of the first + // construct Demangle couldn't parse. This preprocessor symbol is intended + // for use by Abseil demangler maintainers only; its behavior is not part of + // Abseil's public interface. +#ifdef ABSL_INTERNAL_DEMANGLE_RECORDS_HIGH_WATER_MARK + int high_water_mark; // Input position where parsing failed. + bool too_complex; // True if any guard.IsTooComplex() call returned true. +#endif } State; namespace { + +#ifdef ABSL_INTERNAL_DEMANGLE_RECORDS_HIGH_WATER_MARK +void UpdateHighWaterMark(State *state) { + if (state->high_water_mark < state->parse_state.mangled_idx) { + state->high_water_mark = state->parse_state.mangled_idx; + } +} + +void ReportHighWaterMark(State *state) { + // Write out the mangled name with the trouble point marked, provided that the + // output buffer is large enough and the mangled name did not hit a complexity + // limit (in which case the high water mark wouldn't point out an unparsable + // construct, only the point where a budget ran out). + const size_t input_length = std::strlen(state->mangled_begin); + if (input_length + 6 > static_cast<size_t>(state->out_end_idx) || + state->too_complex) { + if (state->out_end_idx > 0) state->out[0] = '\0'; + return; + } + const size_t high_water_mark = static_cast<size_t>(state->high_water_mark); + std::memcpy(state->out, state->mangled_begin, high_water_mark); + std::memcpy(state->out + high_water_mark, "--!--", 5); + std::memcpy(state->out + high_water_mark + 5, + state->mangled_begin + high_water_mark, + input_length - high_water_mark); + state->out[input_length + 5] = '\0'; +} +#else +void UpdateHighWaterMark(State *) {} +void ReportHighWaterMark(State *) {} +#endif + // Prevent deep recursion / stack exhaustion. // Also prevent unbounded handling of complex inputs. class ComplexityGuard { @@ -201,7 +247,7 @@ class ComplexityGuard { ~ComplexityGuard() { --state_->recursion_depth; } // 256 levels of recursion seems like a reasonable upper limit on depth. - // 128 is not enough to demagle synthetic tests from demangle_unittest.txt: + // 128 is not enough to demangle synthetic tests from demangle_unittest.txt: // "_ZaaZZZZ..." and "_ZaaZcvZcvZ..." static constexpr int kRecursionDepthLimit = 256; @@ -222,8 +268,14 @@ class ComplexityGuard { static constexpr int kParseStepsLimit = 1 << 17; bool IsTooComplex() const { - return state_->recursion_depth > kRecursionDepthLimit || - state_->steps > kParseStepsLimit; + if (state_->recursion_depth > kRecursionDepthLimit || + state_->steps > kParseStepsLimit) { +#ifdef ABSL_INTERNAL_DEMANGLE_RECORDS_HIGH_WATER_MARK + state_->too_complex = true; +#endif + return true; + } + return false; } private: @@ -270,6 +322,10 @@ static void InitState(State* state, state->out_end_idx = static_cast<int>(out_size); state->recursion_depth = 0; state->steps = 0; +#ifdef ABSL_INTERNAL_DEMANGLE_RECORDS_HIGH_WATER_MARK + state->high_water_mark = 0; + state->too_complex = false; +#endif state->parse_state.mangled_idx = 0; state->parse_state.out_cur_idx = 0; @@ -291,13 +347,14 @@ static bool ParseOneCharToken(State *state, const char one_char_token) { if (guard.IsTooComplex()) return false; if (RemainingInput(state)[0] == one_char_token) { ++state->parse_state.mangled_idx; + UpdateHighWaterMark(state); return true; } return false; } -// Returns true and advances "mangled_cur" if we find "two_char_token" -// at "mangled_cur" position. It is assumed that "two_char_token" does +// Returns true and advances "mangled_idx" if we find "two_char_token" +// at "mangled_idx" position. It is assumed that "two_char_token" does // not contain '\0'. static bool ParseTwoCharToken(State *state, const char *two_char_token) { ComplexityGuard guard(state); @@ -305,11 +362,45 @@ static bool ParseTwoCharToken(State *state, const char *two_char_token) { if (RemainingInput(state)[0] == two_char_token[0] && RemainingInput(state)[1] == two_char_token[1]) { state->parse_state.mangled_idx += 2; + UpdateHighWaterMark(state); return true; } return false; } +// Returns true and advances "mangled_idx" if we find "three_char_token" +// at "mangled_idx" position. It is assumed that "three_char_token" does +// not contain '\0'. +static bool ParseThreeCharToken(State *state, const char *three_char_token) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + if (RemainingInput(state)[0] == three_char_token[0] && + RemainingInput(state)[1] == three_char_token[1] && + RemainingInput(state)[2] == three_char_token[2]) { + state->parse_state.mangled_idx += 3; + UpdateHighWaterMark(state); + return true; + } + return false; +} + +// Returns true and advances "mangled_idx" if we find a copy of the +// NUL-terminated string "long_token" at "mangled_idx" position. +static bool ParseLongToken(State *state, const char *long_token) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + int i = 0; + for (; long_token[i] != '\0'; ++i) { + // Note that we cannot run off the end of the NUL-terminated input here. + // Inside the loop body, long_token[i] is known to be different from NUL. + // So if we read the NUL on the end of the input here, we return at once. + if (RemainingInput(state)[i] != long_token[i]) return false; + } + state->parse_state.mangled_idx += i; + UpdateHighWaterMark(state); + return true; +} + // Returns true and advances "mangled_cur" if we find any character in // "char_class" at "mangled_cur" position. static bool ParseCharClass(State *state, const char *char_class) { @@ -322,6 +413,7 @@ static bool ParseCharClass(State *state, const char *char_class) { for (; *p != '\0'; ++p) { if (RemainingInput(state)[0] == *p) { ++state->parse_state.mangled_idx; + UpdateHighWaterMark(state); return true; } } @@ -554,6 +646,7 @@ static bool ParseFloatNumber(State *state); static bool ParseSeqId(State *state); static bool ParseIdentifier(State *state, size_t length); static bool ParseOperatorName(State *state, int *arity); +static bool ParseConversionOperatorType(State *state); static bool ParseSpecialName(State *state); static bool ParseCallOffset(State *state); static bool ParseNVOffset(State *state); @@ -563,21 +656,33 @@ static bool ParseCtorDtorName(State *state); static bool ParseDecltype(State *state); static bool ParseType(State *state); static bool ParseCVQualifiers(State *state); +static bool ParseExtendedQualifier(State *state); static bool ParseBuiltinType(State *state); +static bool ParseVendorExtendedType(State *state); static bool ParseFunctionType(State *state); static bool ParseBareFunctionType(State *state); +static bool ParseOverloadAttribute(State *state); static bool ParseClassEnumType(State *state); static bool ParseArrayType(State *state); static bool ParsePointerToMemberType(State *state); static bool ParseTemplateParam(State *state); +static bool ParseTemplateParamDecl(State *state); static bool ParseTemplateTemplateParam(State *state); static bool ParseTemplateArgs(State *state); static bool ParseTemplateArg(State *state); static bool ParseBaseUnresolvedName(State *state); static bool ParseUnresolvedName(State *state); +static bool ParseUnresolvedQualifierLevel(State *state); +static bool ParseUnionSelector(State* state); +static bool ParseFunctionParam(State* state); +static bool ParseBracedExpression(State *state); static bool ParseExpression(State *state); +static bool ParseInitializer(State *state); static bool ParseExprPrimary(State *state); -static bool ParseExprCastValue(State *state); +static bool ParseExprCastValueAndTrailingE(State *state); +static bool ParseQRequiresClauseExpr(State *state); +static bool ParseRequirement(State *state); +static bool ParseTypeConstraint(State *state); static bool ParseLocalName(State *state); static bool ParseLocalNameSuffix(State *state); static bool ParseDiscriminator(State *state); @@ -622,22 +727,34 @@ static bool ParseMangledName(State *state) { } // <encoding> ::= <(function) name> <bare-function-type> +// [`Q` <requires-clause expr>] // ::= <(data) name> // ::= <special-name> +// +// NOTE: Based on http://shortn/_Hoq9qG83rx static bool ParseEncoding(State *state) { ComplexityGuard guard(state); if (guard.IsTooComplex()) return false; - // Implementing the first two productions together as <name> - // [<bare-function-type>] avoids exponential blowup of backtracking. + // Since the first two productions both start with <name>, attempt + // to parse it only once to avoid exponential blowup of backtracking. // - // Since Optional(...) can't fail, there's no need to copy the state for - // backtracking. - if (ParseName(state) && Optional(ParseBareFunctionType(state))) { + // We're careful about exponential blowup because <encoding> recursively + // appears in other productions downstream of its first two productions, + // which means that every call to `ParseName` would possibly indirectly + // result in two calls to `ParseName` etc. + if (ParseName(state)) { + if (!ParseBareFunctionType(state)) { + return true; // <(data) name> + } + + // Parsed: <(function) name> <bare-function-type> + // Pending: [`Q` <requires-clause expr>] + ParseQRequiresClauseExpr(state); // restores state on failure return true; } if (ParseSpecialName(state)) { - return true; + return true; // <special-name> } return false; } @@ -723,19 +840,26 @@ static bool ParseNestedName(State *state) { // <prefix> ::= <prefix> <unqualified-name> // ::= <template-prefix> <template-args> // ::= <template-param> +// ::= <decltype> // ::= <substitution> // ::= # empty // <template-prefix> ::= <prefix> <(template) unqualified-name> // ::= <template-param> // ::= <substitution> +// ::= <vendor-extended-type> static bool ParsePrefix(State *state) { ComplexityGuard guard(state); if (guard.IsTooComplex()) return false; bool has_something = false; while (true) { MaybeAppendSeparator(state); - if (ParseTemplateParam(state) || + if (ParseTemplateParam(state) || ParseDecltype(state) || ParseSubstitution(state, /*accept_std=*/true) || + // Although the official grammar does not mention it, nested-names + // shaped like Nu14__some_builtinIiE6memberE occur in practice, and it + // is not clear what else a compiler is supposed to do when a + // vendor-extended type has named members. + ParseVendorExtendedType(state) || ParseUnscopedName(state) || (ParseOneCharToken(state, 'M') && ParseUnnamedTypeName(state))) { has_something = true; @@ -757,8 +881,14 @@ static bool ParsePrefix(State *state) { // ::= <source-name> [<abi-tags>] // ::= <local-source-name> [<abi-tags>] // ::= <unnamed-type-name> [<abi-tags>] +// ::= DC <source-name>+ E # C++17 structured binding +// ::= F <source-name> # C++20 constrained friend +// ::= F <operator-name> # C++20 constrained friend // // <local-source-name> is a GCC extension; see below. +// +// For the F notation for constrained friends, see +// https://github.com/itanium-cxx-abi/cxx-abi/issues/24#issuecomment-1491130332. static bool ParseUnqualifiedName(State *state) { ComplexityGuard guard(state); if (guard.IsTooComplex()) return false; @@ -767,6 +897,23 @@ static bool ParseUnqualifiedName(State *state) { ParseUnnamedTypeName(state)) { return ParseAbiTags(state); } + + // DC <source-name>+ E + ParseState copy = state->parse_state; + if (ParseTwoCharToken(state, "DC") && OneOrMore(ParseSourceName, state) && + ParseOneCharToken(state, 'E')) { + return true; + } + state->parse_state = copy; + + // F <source-name> + // F <operator-name> + if (ParseOneCharToken(state, 'F') && MaybeAppend(state, "friend ") && + (ParseSourceName(state) || ParseOperatorName(state, nullptr))) { + return true; + } + state->parse_state = copy; + return false; } @@ -824,7 +971,11 @@ static bool ParseLocalSourceName(State *state) { // <unnamed-type-name> ::= Ut [<(nonnegative) number>] _ // ::= <closure-type-name> // <closure-type-name> ::= Ul <lambda-sig> E [<(nonnegative) number>] _ -// <lambda-sig> ::= <(parameter) type>+ +// <lambda-sig> ::= <template-param-decl>* <(parameter) type>+ +// +// For <template-param-decl>* in <lambda-sig> see: +// +// https://github.com/itanium-cxx-abi/cxx-abi/issues/31 static bool ParseUnnamedTypeName(State *state) { ComplexityGuard guard(state); if (guard.IsTooComplex()) return false; @@ -847,6 +998,7 @@ static bool ParseUnnamedTypeName(State *state) { // Closure type. which = -1; if (ParseTwoCharToken(state, "Ul") && DisableAppend(state) && + ZeroOrMore(ParseTemplateParamDecl, state) && OneOrMore(ParseType, state) && RestoreAppend(state, copy.append) && ParseOneCharToken(state, 'E') && Optional(ParseNumber(state, &which)) && which <= std::numeric_limits<int>::max() - 2 && // Don't overflow. @@ -888,6 +1040,7 @@ static bool ParseNumber(State *state, int *number_out) { } if (p != RemainingInput(state)) { // Conversion succeeded. state->parse_state.mangled_idx += p - RemainingInput(state); + UpdateHighWaterMark(state); if (number_out != nullptr) { // Note: possibly truncate "number". *number_out = static_cast<int>(number); @@ -910,6 +1063,7 @@ static bool ParseFloatNumber(State *state) { } if (p != RemainingInput(state)) { // Conversion succeeded. state->parse_state.mangled_idx += p - RemainingInput(state); + UpdateHighWaterMark(state); return true; } return false; @@ -928,6 +1082,7 @@ static bool ParseSeqId(State *state) { } if (p != RemainingInput(state)) { // Conversion succeeded. state->parse_state.mangled_idx += p - RemainingInput(state); + UpdateHighWaterMark(state); return true; } return false; @@ -946,11 +1101,13 @@ static bool ParseIdentifier(State *state, size_t length) { MaybeAppendWithLength(state, RemainingInput(state), length); } state->parse_state.mangled_idx += length; + UpdateHighWaterMark(state); return true; } // <operator-name> ::= nw, and other two letters cases // ::= cv <type> # (cast) +// ::= li <source-name> # C++11 user-defined literal // ::= v <digit> <source-name> # vendor extended operator static bool ParseOperatorName(State *state, int *arity) { ComplexityGuard guard(state); @@ -961,7 +1118,7 @@ static bool ParseOperatorName(State *state, int *arity) { // First check with "cv" (cast) case. ParseState copy = state->parse_state; if (ParseTwoCharToken(state, "cv") && MaybeAppend(state, "operator ") && - EnterNestedName(state) && ParseType(state) && + EnterNestedName(state) && ParseConversionOperatorType(state) && LeaveNestedName(state, copy.nest_level)) { if (arity != nullptr) { *arity = 1; @@ -970,6 +1127,13 @@ static bool ParseOperatorName(State *state, int *arity) { } state->parse_state = copy; + // Then user-defined literals. + if (ParseTwoCharToken(state, "li") && MaybeAppend(state, "operator\"\" ") && + ParseSourceName(state)) { + return true; + } + state->parse_state = copy; + // Then vendor extended operators. if (ParseOneCharToken(state, 'v') && ParseDigit(state, arity) && ParseSourceName(state)) { @@ -997,36 +1161,120 @@ static bool ParseOperatorName(State *state, int *arity) { } MaybeAppend(state, p->real_name); state->parse_state.mangled_idx += 2; + UpdateHighWaterMark(state); return true; } } return false; } +// <operator-name> ::= cv <type> # (cast) +// +// The name of a conversion operator is the one place where cv-qualifiers, *, &, +// and other simple type combinators are expected to appear in our stripped-down +// demangling (elsewhere they appear in function signatures or template +// arguments, which we omit from the output). We make reasonable efforts to +// render simple cases accurately. +static bool ParseConversionOperatorType(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + ParseState copy = state->parse_state; + + // Scan pointers, const, and other easy mangling prefixes with postfix + // demanglings. Remember the range of input for later rescanning. + // + // See `ParseType` and the `switch` below for the meaning of each char. + const char* begin_simple_prefixes = RemainingInput(state); + while (ParseCharClass(state, "OPRCGrVK")) {} + const char* end_simple_prefixes = RemainingInput(state); + + // Emit the base type first. + if (!ParseType(state)) { + state->parse_state = copy; + return false; + } + + // Then rescan the easy type combinators in reverse order to emit their + // demanglings in the expected output order. + while (begin_simple_prefixes != end_simple_prefixes) { + switch (*--end_simple_prefixes) { + case 'P': + MaybeAppend(state, "*"); + break; + case 'R': + MaybeAppend(state, "&"); + break; + case 'O': + MaybeAppend(state, "&&"); + break; + case 'C': + MaybeAppend(state, " _Complex"); + break; + case 'G': + MaybeAppend(state, " _Imaginary"); + break; + case 'r': + MaybeAppend(state, " restrict"); + break; + case 'V': + MaybeAppend(state, " volatile"); + break; + case 'K': + MaybeAppend(state, " const"); + break; + } + } + return true; +} + // <special-name> ::= TV <type> // ::= TT <type> // ::= TI <type> // ::= TS <type> -// ::= TH <type> # thread-local +// ::= TW <name> # thread-local wrapper +// ::= TH <name> # thread-local initialization // ::= Tc <call-offset> <call-offset> <(base) encoding> // ::= GV <(object) name> +// ::= GR <(object) name> [<seq-id>] _ // ::= T <call-offset> <(base) encoding> +// ::= GTt <encoding> # transaction-safe entry point +// ::= TA <template-arg> # nontype template parameter object // G++ extensions: // ::= TC <type> <(offset) number> _ <(base) type> // ::= TF <type> // ::= TJ <type> -// ::= GR <name> +// ::= GR <name> # without final _, perhaps an earlier form? // ::= GA <encoding> // ::= Th <call-offset> <(base) encoding> // ::= Tv <call-offset> <(base) encoding> // -// Note: we don't care much about them since they don't appear in -// stack traces. The are special data. +// Note: Most of these are special data, not functions that occur in stack +// traces. Exceptions are TW and TH, which denote functions supporting the +// thread_local feature. For these see: +// +// https://maskray.me/blog/2021-02-14-all-about-thread-local-storage +// +// For TA see https://github.com/itanium-cxx-abi/cxx-abi/issues/63. static bool ParseSpecialName(State *state) { ComplexityGuard guard(state); if (guard.IsTooComplex()) return false; ParseState copy = state->parse_state; - if (ParseOneCharToken(state, 'T') && ParseCharClass(state, "VTISH") && + + if (ParseTwoCharToken(state, "TW")) { + MaybeAppend(state, "thread-local wrapper routine for "); + if (ParseName(state)) return true; + state->parse_state = copy; + return false; + } + + if (ParseTwoCharToken(state, "TH")) { + MaybeAppend(state, "thread-local initialization routine for "); + if (ParseName(state)) return true; + state->parse_state = copy; + return false; + } + + if (ParseOneCharToken(state, 'T') && ParseCharClass(state, "VTIS") && ParseType(state)) { return true; } @@ -1064,21 +1312,51 @@ static bool ParseSpecialName(State *state) { } state->parse_state = copy; - if (ParseTwoCharToken(state, "GR") && ParseName(state)) { + // <special-name> ::= GR <(object) name> [<seq-id>] _ # modern standard + // ::= GR <(object) name> # also recognized + if (ParseTwoCharToken(state, "GR")) { + MaybeAppend(state, "reference temporary for "); + if (!ParseName(state)) { + state->parse_state = copy; + return false; + } + const bool has_seq_id = ParseSeqId(state); + const bool has_underscore = ParseOneCharToken(state, '_'); + if (has_seq_id && !has_underscore) { + state->parse_state = copy; + return false; + } return true; } - state->parse_state = copy; if (ParseTwoCharToken(state, "GA") && ParseEncoding(state)) { return true; } state->parse_state = copy; + if (ParseThreeCharToken(state, "GTt") && + MaybeAppend(state, "transaction clone for ") && ParseEncoding(state)) { + return true; + } + state->parse_state = copy; + if (ParseOneCharToken(state, 'T') && ParseCharClass(state, "hv") && ParseCallOffset(state) && ParseEncoding(state)) { return true; } state->parse_state = copy; + + if (ParseTwoCharToken(state, "TA")) { + bool append = state->parse_state.append; + DisableAppend(state); + if (ParseTemplateArg(state)) { + RestoreAppend(state, append); + MaybeAppend(state, "template parameter object"); + return true; + } + } + state->parse_state = copy; + return false; } @@ -1182,7 +1460,6 @@ static bool ParseDecltype(State *state) { // ::= O <type> # rvalue reference-to (C++0x) // ::= C <type> # complex pair (C 2000) // ::= G <type> # imaginary (C 2000) -// ::= U <source-name> <type> # vendor extended type qualifier // ::= <builtin-type> // ::= <function-type> // ::= <class-enum-type> # note: just an alias for <name> @@ -1193,7 +1470,9 @@ static bool ParseDecltype(State *state) { // ::= <decltype> // ::= <substitution> // ::= Dp <type> # pack expansion of (C++0x) -// ::= Dv <num-elems> _ # GNU vector extension +// ::= Dv <(elements) number> _ <type> # GNU vector extension +// ::= Dv <(bytes) expression> _ <type> +// ::= Dk <type-constraint> # constrained auto // static bool ParseType(State *state) { ComplexityGuard guard(state); @@ -1236,12 +1515,6 @@ static bool ParseType(State *state) { } state->parse_state = copy; - if (ParseOneCharToken(state, 'U') && ParseSourceName(state) && - ParseType(state)) { - return true; - } - state->parse_state = copy; - if (ParseBuiltinType(state) || ParseFunctionType(state) || ParseClassEnumType(state) || ParseArrayType(state) || ParsePointerToMemberType(state) || ParseDecltype(state) || @@ -1260,54 +1533,160 @@ static bool ParseType(State *state) { return true; } + // GNU vector extension Dv <number> _ <type> if (ParseTwoCharToken(state, "Dv") && ParseNumber(state, nullptr) && - ParseOneCharToken(state, '_')) { + ParseOneCharToken(state, '_') && ParseType(state)) { return true; } state->parse_state = copy; - return false; + // GNU vector extension Dv <expression> _ <type> + if (ParseTwoCharToken(state, "Dv") && ParseExpression(state) && + ParseOneCharToken(state, '_') && ParseType(state)) { + return true; + } + state->parse_state = copy; + + if (ParseTwoCharToken(state, "Dk") && ParseTypeConstraint(state)) { + return true; + } + state->parse_state = copy; + + // For this notation see CXXNameMangler::mangleType in Clang's source code. + // The relevant logic and its comment "not clear how to mangle this!" date + // from 2011, so it may be with us awhile. + return ParseLongToken(state, "_SUBSTPACK_"); } +// <qualifiers> ::= <extended-qualifier>* <CV-qualifiers> // <CV-qualifiers> ::= [r] [V] [K] +// // We don't allow empty <CV-qualifiers> to avoid infinite loop in // ParseType(). static bool ParseCVQualifiers(State *state) { ComplexityGuard guard(state); if (guard.IsTooComplex()) return false; int num_cv_qualifiers = 0; + while (ParseExtendedQualifier(state)) ++num_cv_qualifiers; num_cv_qualifiers += ParseOneCharToken(state, 'r'); num_cv_qualifiers += ParseOneCharToken(state, 'V'); num_cv_qualifiers += ParseOneCharToken(state, 'K'); return num_cv_qualifiers > 0; } +// <extended-qualifier> ::= U <source-name> [<template-args>] +static bool ParseExtendedQualifier(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + ParseState copy = state->parse_state; + + if (!ParseOneCharToken(state, 'U')) return false; + + bool append = state->parse_state.append; + DisableAppend(state); + if (!ParseSourceName(state)) { + state->parse_state = copy; + return false; + } + Optional(ParseTemplateArgs(state)); + RestoreAppend(state, append); + return true; +} + // <builtin-type> ::= v, etc. # single-character builtin types -// ::= u <source-name> +// ::= <vendor-extended-type> // ::= Dd, etc. # two-character builtin types +// ::= DB (<number> | <expression>) _ # _BitInt(N) +// ::= DU (<number> | <expression>) _ # unsigned _BitInt(N) +// ::= DF <number> _ # _FloatN (N bits) +// ::= DF <number> x # _FloatNx +// ::= DF16b # std::bfloat16_t // // Not supported: -// ::= DF <number> _ # _FloatN (N bits) -// +// ::= [DS] DA <fixed-point-size> +// ::= [DS] DR <fixed-point-size> +// because real implementations of N1169 fixed-point are scant. static bool ParseBuiltinType(State *state) { ComplexityGuard guard(state); if (guard.IsTooComplex()) return false; - const AbbrevPair *p; - for (p = kBuiltinTypeList; p->abbrev != nullptr; ++p) { + ParseState copy = state->parse_state; + + // DB (<number> | <expression>) _ # _BitInt(N) + // DU (<number> | <expression>) _ # unsigned _BitInt(N) + if (ParseTwoCharToken(state, "DB") || + (ParseTwoCharToken(state, "DU") && MaybeAppend(state, "unsigned "))) { + bool append = state->parse_state.append; + DisableAppend(state); + int number = -1; + if (!ParseNumber(state, &number) && !ParseExpression(state)) { + state->parse_state = copy; + return false; + } + RestoreAppend(state, append); + + if (!ParseOneCharToken(state, '_')) { + state->parse_state = copy; + return false; + } + + MaybeAppend(state, "_BitInt("); + if (number >= 0) { + MaybeAppendDecimal(state, number); + } else { + MaybeAppend(state, "?"); // the best we can do for dependent sizes + } + MaybeAppend(state, ")"); + return true; + } + + // DF <number> _ # _FloatN + // DF <number> x # _FloatNx + // DF16b # std::bfloat16_t + if (ParseTwoCharToken(state, "DF")) { + if (ParseThreeCharToken(state, "16b")) { + MaybeAppend(state, "std::bfloat16_t"); + return true; + } + int number = 0; + if (!ParseNumber(state, &number)) { + state->parse_state = copy; + return false; + } + MaybeAppend(state, "_Float"); + MaybeAppendDecimal(state, number); + if (ParseOneCharToken(state, 'x')) { + MaybeAppend(state, "x"); + return true; + } + if (ParseOneCharToken(state, '_')) return true; + state->parse_state = copy; + return false; + } + + for (const AbbrevPair *p = kBuiltinTypeList; p->abbrev != nullptr; ++p) { // Guaranteed only 1- or 2-character strings in kBuiltinTypeList. if (p->abbrev[1] == '\0') { if (ParseOneCharToken(state, p->abbrev[0])) { MaybeAppend(state, p->real_name); - return true; + return true; // ::= v, etc. # single-character builtin types } } else if (p->abbrev[2] == '\0' && ParseTwoCharToken(state, p->abbrev)) { MaybeAppend(state, p->real_name); - return true; + return true; // ::= Dd, etc. # two-character builtin types } } + return ParseVendorExtendedType(state); +} + +// <vendor-extended-type> ::= u <source-name> [<template-args>] +static bool ParseVendorExtendedType(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + ParseState copy = state->parse_state; - if (ParseOneCharToken(state, 'u') && ParseSourceName(state)) { + if (ParseOneCharToken(state, 'u') && ParseSourceName(state) && + Optional(ParseTemplateArgs(state))) { return true; } state->parse_state = copy; @@ -1342,28 +1721,44 @@ static bool ParseExceptionSpec(State *state) { return false; } -// <function-type> ::= [exception-spec] F [Y] <bare-function-type> [O] E +// <function-type> ::= +// [exception-spec] [Dx] F [Y] <bare-function-type> [<ref-qualifier>] E +// +// <ref-qualifier> ::= R | O static bool ParseFunctionType(State *state) { ComplexityGuard guard(state); if (guard.IsTooComplex()) return false; ParseState copy = state->parse_state; - if (Optional(ParseExceptionSpec(state)) && ParseOneCharToken(state, 'F') && - Optional(ParseOneCharToken(state, 'Y')) && ParseBareFunctionType(state) && - Optional(ParseOneCharToken(state, 'O')) && - ParseOneCharToken(state, 'E')) { - return true; + Optional(ParseExceptionSpec(state)); + Optional(ParseTwoCharToken(state, "Dx")); + if (!ParseOneCharToken(state, 'F')) { + state->parse_state = copy; + return false; } - state->parse_state = copy; - return false; + Optional(ParseOneCharToken(state, 'Y')); + if (!ParseBareFunctionType(state)) { + state->parse_state = copy; + return false; + } + Optional(ParseCharClass(state, "RO")); + if (!ParseOneCharToken(state, 'E')) { + state->parse_state = copy; + return false; + } + return true; } -// <bare-function-type> ::= <(signature) type>+ +// <bare-function-type> ::= <overload-attribute>* <(signature) type>+ +// +// The <overload-attribute>* prefix is nonstandard; see the comment on +// ParseOverloadAttribute. static bool ParseBareFunctionType(State *state) { ComplexityGuard guard(state); if (guard.IsTooComplex()) return false; ParseState copy = state->parse_state; DisableAppend(state); - if (OneOrMore(ParseType, state)) { + if (ZeroOrMore(ParseOverloadAttribute, state) && + OneOrMore(ParseType, state)) { RestoreAppend(state, copy.append); MaybeAppend(state, "()"); return true; @@ -1372,11 +1767,43 @@ static bool ParseBareFunctionType(State *state) { return false; } +// <overload-attribute> ::= Ua <name> +// +// The nonstandard <overload-attribute> production is sufficient to accept the +// current implementation of __attribute__((enable_if(condition, "message"))) +// and future attributes of a similar shape. See +// https://clang.llvm.org/docs/AttributeReference.html#enable-if and the +// definition of CXXNameMangler::mangleFunctionEncodingBareType in Clang's +// source code. +static bool ParseOverloadAttribute(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + ParseState copy = state->parse_state; + if (ParseTwoCharToken(state, "Ua") && ParseName(state)) { + return true; + } + state->parse_state = copy; + return false; +} + // <class-enum-type> ::= <name> +// ::= Ts <name> # struct Name or class Name +// ::= Tu <name> # union Name +// ::= Te <name> # enum Name +// +// See http://shortn/_W3YrltiEd0. static bool ParseClassEnumType(State *state) { ComplexityGuard guard(state); if (guard.IsTooComplex()) return false; - return ParseName(state); + ParseState copy = state->parse_state; + if (Optional(ParseTwoCharToken(state, "Ts") || + ParseTwoCharToken(state, "Tu") || + ParseTwoCharToken(state, "Te")) && + ParseName(state)) { + return true; + } + state->parse_state = copy; + return false; } // <array-type> ::= A <(positive dimension) number> _ <(element) type> @@ -1413,21 +1840,83 @@ static bool ParsePointerToMemberType(State *state) { // <template-param> ::= T_ // ::= T <parameter-2 non-negative number> _ +// ::= TL <level-1> __ +// ::= TL <level-1> _ <parameter-2 non-negative number> _ static bool ParseTemplateParam(State *state) { ComplexityGuard guard(state); if (guard.IsTooComplex()) return false; if (ParseTwoCharToken(state, "T_")) { MaybeAppend(state, "?"); // We don't support template substitutions. - return true; + return true; // ::= T_ } ParseState copy = state->parse_state; if (ParseOneCharToken(state, 'T') && ParseNumber(state, nullptr) && ParseOneCharToken(state, '_')) { MaybeAppend(state, "?"); // We don't support template substitutions. + return true; // ::= T <parameter-2 non-negative number> _ + } + state->parse_state = copy; + + if (ParseTwoCharToken(state, "TL") && ParseNumber(state, nullptr)) { + if (ParseTwoCharToken(state, "__")) { + MaybeAppend(state, "?"); // We don't support template substitutions. + return true; // ::= TL <level-1> __ + } + + if (ParseOneCharToken(state, '_') && ParseNumber(state, nullptr) && + ParseOneCharToken(state, '_')) { + MaybeAppend(state, "?"); // We don't support template substitutions. + return true; // ::= TL <level-1> _ <parameter-2 non-negative number> _ + } + } + state->parse_state = copy; + return false; +} + +// <template-param-decl> +// ::= Ty # template type parameter +// ::= Tk <concept name> [<template-args>] # constrained type parameter +// ::= Tn <type> # template non-type parameter +// ::= Tt <template-param-decl>* E # template template parameter +// ::= Tp <template-param-decl> # template parameter pack +// +// NOTE: <concept name> is just a <name>: http://shortn/_MqJVyr0fc1 +// TODO(b/324066279): Implement optional suffix for `Tt`: +// [Q <requires-clause expr>] +static bool ParseTemplateParamDecl(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + ParseState copy = state->parse_state; + + if (ParseTwoCharToken(state, "Ty")) { + return true; + } + state->parse_state = copy; + + if (ParseTwoCharToken(state, "Tk") && ParseName(state) && + Optional(ParseTemplateArgs(state))) { + return true; + } + state->parse_state = copy; + + if (ParseTwoCharToken(state, "Tn") && ParseType(state)) { return true; } state->parse_state = copy; + + if (ParseTwoCharToken(state, "Tt") && + ZeroOrMore(ParseTemplateParamDecl, state) && + ParseOneCharToken(state, 'E')) { + return true; + } + state->parse_state = copy; + + if (ParseTwoCharToken(state, "Tp") && ParseTemplateParamDecl(state)) { + return true; + } + state->parse_state = copy; + return false; } @@ -1441,13 +1930,14 @@ static bool ParseTemplateTemplateParam(State *state) { ParseSubstitution(state, /*accept_std=*/false)); } -// <template-args> ::= I <template-arg>+ E +// <template-args> ::= I <template-arg>+ [Q <requires-clause expr>] E static bool ParseTemplateArgs(State *state) { ComplexityGuard guard(state); if (guard.IsTooComplex()) return false; ParseState copy = state->parse_state; DisableAppend(state); if (ParseOneCharToken(state, 'I') && OneOrMore(ParseTemplateArg, state) && + Optional(ParseQRequiresClauseExpr(state)) && ParseOneCharToken(state, 'E')) { RestoreAppend(state, copy.append); MaybeAppend(state, "<>"); @@ -1457,7 +1947,8 @@ static bool ParseTemplateArgs(State *state) { return false; } -// <template-arg> ::= <type> +// <template-arg> ::= <template-param-decl> <template-arg> +// ::= <type> // ::= <expr-primary> // ::= J <template-arg>* E # argument pack // ::= X <expression> E @@ -1541,7 +2032,7 @@ static bool ParseTemplateArg(State *state) { // ::= L <source-name> [<template-args>] [<expr-cast-value> E] if (ParseLocalSourceName(state) && Optional(ParseTemplateArgs(state))) { copy = state->parse_state; - if (ParseExprCastValue(state) && ParseOneCharToken(state, 'E')) { + if (ParseExprCastValueAndTrailingE(state)) { return true; } state->parse_state = copy; @@ -1560,6 +2051,12 @@ static bool ParseTemplateArg(State *state) { return true; } state->parse_state = copy; + + if (ParseTemplateParamDecl(state) && ParseTemplateArg(state)) { + return true; + } + state->parse_state = copy; + return false; } @@ -1614,6 +2111,13 @@ static bool ParseBaseUnresolvedName(State *state) { // <base-unresolved-name> // ::= [gs] sr <unresolved-qualifier-level>+ E // <base-unresolved-name> +// ::= sr St <simple-id> <simple-id> # nonstandard +// +// The last case is not part of the official grammar but has been observed in +// real-world examples that the GNU demangler (but not the LLVM demangler) is +// able to decode; see demangle_test.cc for one such symbol name. The shape +// sr St <simple-id> <simple-id> was inferred by closed-box testing of the GNU +// demangler. static bool ParseUnresolvedName(State *state) { ComplexityGuard guard(state); if (guard.IsTooComplex()) return false; @@ -1633,7 +2137,7 @@ static bool ParseUnresolvedName(State *state) { if (ParseTwoCharToken(state, "sr") && ParseOneCharToken(state, 'N') && ParseUnresolvedType(state) && - OneOrMore(/* <unresolved-qualifier-level> ::= */ ParseSimpleId, state) && + OneOrMore(ParseUnresolvedQualifierLevel, state) && ParseOneCharToken(state, 'E') && ParseBaseUnresolvedName(state)) { return true; } @@ -1641,35 +2145,160 @@ static bool ParseUnresolvedName(State *state) { if (Optional(ParseTwoCharToken(state, "gs")) && ParseTwoCharToken(state, "sr") && - OneOrMore(/* <unresolved-qualifier-level> ::= */ ParseSimpleId, state) && + OneOrMore(ParseUnresolvedQualifierLevel, state) && ParseOneCharToken(state, 'E') && ParseBaseUnresolvedName(state)) { return true; } state->parse_state = copy; + if (ParseTwoCharToken(state, "sr") && ParseTwoCharToken(state, "St") && + ParseSimpleId(state) && ParseSimpleId(state)) { + return true; + } + state->parse_state = copy; + return false; } +// <unresolved-qualifier-level> ::= <simple-id> +// ::= <substitution> <template-args> +// +// The production <substitution> <template-args> is nonstandard but is observed +// in practice. An upstream discussion on the best shape of <unresolved-name> +// has not converged: +// +// https://github.com/itanium-cxx-abi/cxx-abi/issues/38 +static bool ParseUnresolvedQualifierLevel(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + + if (ParseSimpleId(state)) return true; + + ParseState copy = state->parse_state; + if (ParseSubstitution(state, /*accept_std=*/false) && + ParseTemplateArgs(state)) { + return true; + } + state->parse_state = copy; + return false; +} + +// <union-selector> ::= _ [<number>] +// +// https://github.com/itanium-cxx-abi/cxx-abi/issues/47 +static bool ParseUnionSelector(State *state) { + return ParseOneCharToken(state, '_') && Optional(ParseNumber(state, nullptr)); +} + +// <function-param> ::= fp <(top-level) CV-qualifiers> _ +// ::= fp <(top-level) CV-qualifiers> <number> _ +// ::= fL <number> p <(top-level) CV-qualifiers> _ +// ::= fL <number> p <(top-level) CV-qualifiers> <number> _ +// ::= fpT # this +static bool ParseFunctionParam(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + + ParseState copy = state->parse_state; + + // Function-param expression (level 0). + if (ParseTwoCharToken(state, "fp") && Optional(ParseCVQualifiers(state)) && + Optional(ParseNumber(state, nullptr)) && ParseOneCharToken(state, '_')) { + return true; + } + state->parse_state = copy; + + // Function-param expression (level 1+). + if (ParseTwoCharToken(state, "fL") && Optional(ParseNumber(state, nullptr)) && + ParseOneCharToken(state, 'p') && Optional(ParseCVQualifiers(state)) && + Optional(ParseNumber(state, nullptr)) && ParseOneCharToken(state, '_')) { + return true; + } + state->parse_state = copy; + + return ParseThreeCharToken(state, "fpT"); +} + +// <braced-expression> ::= <expression> +// ::= di <field source-name> <braced-expression> +// ::= dx <index expression> <braced-expression> +// ::= dX <expression> <expression> <braced-expression> +static bool ParseBracedExpression(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + + ParseState copy = state->parse_state; + + if (ParseTwoCharToken(state, "di") && ParseSourceName(state) && + ParseBracedExpression(state)) { + return true; + } + state->parse_state = copy; + + if (ParseTwoCharToken(state, "dx") && ParseExpression(state) && + ParseBracedExpression(state)) { + return true; + } + state->parse_state = copy; + + if (ParseTwoCharToken(state, "dX") && + ParseExpression(state) && ParseExpression(state) && + ParseBracedExpression(state)) { + return true; + } + state->parse_state = copy; + + return ParseExpression(state); +} + // <expression> ::= <1-ary operator-name> <expression> // ::= <2-ary operator-name> <expression> <expression> // ::= <3-ary operator-name> <expression> <expression> <expression> +// ::= pp_ <expression> # ++e; pp <expression> is e++ +// ::= mm_ <expression> # --e; mm <expression> is e-- // ::= cl <expression>+ E // ::= cp <simple-id> <expression>* E # Clang-specific. +// ::= so <type> <expression> [<number>] <union-selector>* [p] E // ::= cv <type> <expression> # type (expression) // ::= cv <type> _ <expression>* E # type (expr-list) +// ::= tl <type> <braced-expression>* E +// ::= il <braced-expression>* E +// ::= [gs] nw <expression>* _ <type> E +// ::= [gs] nw <expression>* _ <type> <initializer> +// ::= [gs] na <expression>* _ <type> E +// ::= [gs] na <expression>* _ <type> <initializer> +// ::= [gs] dl <expression> +// ::= [gs] da <expression> +// ::= dc <type> <expression> +// ::= sc <type> <expression> +// ::= cc <type> <expression> +// ::= rc <type> <expression> +// ::= ti <type> +// ::= te <expression> // ::= st <type> +// ::= at <type> +// ::= az <expression> +// ::= nx <expression> // ::= <template-param> // ::= <function-param> +// ::= sZ <template-param> +// ::= sZ <function-param> +// ::= sP <template-arg>* E // ::= <expr-primary> // ::= dt <expression> <unresolved-name> # expr.name // ::= pt <expression> <unresolved-name> # expr->name // ::= sp <expression> # argument pack expansion +// ::= fl <binary operator-name> <expression> +// ::= fr <binary operator-name> <expression> +// ::= fL <binary operator-name> <expression> <expression> +// ::= fR <binary operator-name> <expression> <expression> +// ::= tw <expression> +// ::= tr // ::= sr <type> <unqualified-name> <template-args> // ::= sr <type> <unqualified-name> -// <function-param> ::= fp <(top-level) CV-qualifiers> _ -// ::= fp <(top-level) CV-qualifiers> <number> _ -// ::= fL <number> p <(top-level) CV-qualifiers> _ -// ::= fL <number> p <(top-level) CV-qualifiers> <number> _ +// ::= u <source-name> <template-arg>* E # vendor extension +// ::= rq <requirement>+ E +// ::= rQ <bare-function-type> _ <requirement>+ E static bool ParseExpression(State *state) { ComplexityGuard guard(state); if (guard.IsTooComplex()) return false; @@ -1686,6 +2315,15 @@ static bool ParseExpression(State *state) { } state->parse_state = copy; + // Preincrement and predecrement. Postincrement and postdecrement are handled + // by the operator-name logic later on. + if ((ParseThreeCharToken(state, "pp_") || + ParseThreeCharToken(state, "mm_")) && + ParseExpression(state)) { + return true; + } + state->parse_state = copy; + // Clang-specific "cp <simple-id> <expression>* E" // https://clang.llvm.org/doxygen/ItaniumMangle_8cpp_source.html#l04338 if (ParseTwoCharToken(state, "cp") && ParseSimpleId(state) && @@ -1694,17 +2332,65 @@ static bool ParseExpression(State *state) { } state->parse_state = copy; - // Function-param expression (level 0). - if (ParseTwoCharToken(state, "fp") && Optional(ParseCVQualifiers(state)) && - Optional(ParseNumber(state, nullptr)) && ParseOneCharToken(state, '_')) { + // <expression> ::= so <type> <expression> [<number>] <union-selector>* [p] E + // + // https://github.com/itanium-cxx-abi/cxx-abi/issues/47 + if (ParseTwoCharToken(state, "so") && ParseType(state) && + ParseExpression(state) && Optional(ParseNumber(state, nullptr)) && + ZeroOrMore(ParseUnionSelector, state) && + Optional(ParseOneCharToken(state, 'p')) && + ParseOneCharToken(state, 'E')) { return true; } state->parse_state = copy; - // Function-param expression (level 1+). - if (ParseTwoCharToken(state, "fL") && Optional(ParseNumber(state, nullptr)) && - ParseOneCharToken(state, 'p') && Optional(ParseCVQualifiers(state)) && - Optional(ParseNumber(state, nullptr)) && ParseOneCharToken(state, '_')) { + // <expression> ::= <function-param> + if (ParseFunctionParam(state)) return true; + state->parse_state = copy; + + // <expression> ::= tl <type> <braced-expression>* E + if (ParseTwoCharToken(state, "tl") && ParseType(state) && + ZeroOrMore(ParseBracedExpression, state) && + ParseOneCharToken(state, 'E')) { + return true; + } + state->parse_state = copy; + + // <expression> ::= il <braced-expression>* E + if (ParseTwoCharToken(state, "il") && + ZeroOrMore(ParseBracedExpression, state) && + ParseOneCharToken(state, 'E')) { + return true; + } + state->parse_state = copy; + + // <expression> ::= [gs] nw <expression>* _ <type> E + // ::= [gs] nw <expression>* _ <type> <initializer> + // ::= [gs] na <expression>* _ <type> E + // ::= [gs] na <expression>* _ <type> <initializer> + if (Optional(ParseTwoCharToken(state, "gs")) && + (ParseTwoCharToken(state, "nw") || ParseTwoCharToken(state, "na")) && + ZeroOrMore(ParseExpression, state) && ParseOneCharToken(state, '_') && + ParseType(state) && + (ParseOneCharToken(state, 'E') || ParseInitializer(state))) { + return true; + } + state->parse_state = copy; + + // <expression> ::= [gs] dl <expression> + // ::= [gs] da <expression> + if (Optional(ParseTwoCharToken(state, "gs")) && + (ParseTwoCharToken(state, "dl") || ParseTwoCharToken(state, "da")) && + ParseExpression(state)) { + return true; + } + state->parse_state = copy; + + // dynamic_cast, static_cast, const_cast, reinterpret_cast. + // + // <expression> ::= (dc | sc | cc | rc) <type> <expression> + if (ParseCharClass(state, "dscr") && ParseOneCharToken(state, 'c') && + ParseType(state) && ParseExpression(state)) { return true; } state->parse_state = copy; @@ -1746,15 +2432,96 @@ static bool ParseExpression(State *state) { } state->parse_state = copy; + // typeid(type) + if (ParseTwoCharToken(state, "ti") && ParseType(state)) { + return true; + } + state->parse_state = copy; + + // typeid(expression) + if (ParseTwoCharToken(state, "te") && ParseExpression(state)) { + return true; + } + state->parse_state = copy; + // sizeof type if (ParseTwoCharToken(state, "st") && ParseType(state)) { return true; } state->parse_state = copy; + // alignof(type) + if (ParseTwoCharToken(state, "at") && ParseType(state)) { + return true; + } + state->parse_state = copy; + + // alignof(expression), a GNU extension + if (ParseTwoCharToken(state, "az") && ParseExpression(state)) { + return true; + } + state->parse_state = copy; + + // noexcept(expression) appearing as an expression in a dependent signature + if (ParseTwoCharToken(state, "nx") && ParseExpression(state)) { + return true; + } + state->parse_state = copy; + + // sizeof...(pack) + // + // <expression> ::= sZ <template-param> + // ::= sZ <function-param> + if (ParseTwoCharToken(state, "sZ") && + (ParseFunctionParam(state) || ParseTemplateParam(state))) { + return true; + } + state->parse_state = copy; + + // sizeof...(pack) captured from an alias template + // + // <expression> ::= sP <template-arg>* E + if (ParseTwoCharToken(state, "sP") && ZeroOrMore(ParseTemplateArg, state) && + ParseOneCharToken(state, 'E')) { + return true; + } + state->parse_state = copy; + + // Unary folds (... op pack) and (pack op ...). + // + // <expression> ::= fl <binary operator-name> <expression> + // ::= fr <binary operator-name> <expression> + if ((ParseTwoCharToken(state, "fl") || ParseTwoCharToken(state, "fr")) && + ParseOperatorName(state, nullptr) && ParseExpression(state)) { + return true; + } + state->parse_state = copy; + + // Binary folds (init op ... op pack) and (pack op ... op init). + // + // <expression> ::= fL <binary operator-name> <expression> <expression> + // ::= fR <binary operator-name> <expression> <expression> + if ((ParseTwoCharToken(state, "fL") || ParseTwoCharToken(state, "fR")) && + ParseOperatorName(state, nullptr) && ParseExpression(state) && + ParseExpression(state)) { + return true; + } + state->parse_state = copy; + + // tw <expression>: throw e + if (ParseTwoCharToken(state, "tw") && ParseExpression(state)) { + return true; + } + state->parse_state = copy; + + // tr: throw (rethrows an exception from the handler that caught it) + if (ParseTwoCharToken(state, "tr")) return true; + // Object and pointer member access expressions. + // + // <expression> ::= (dt | pt) <expression> <unresolved-name> if ((ParseTwoCharToken(state, "dt") || ParseTwoCharToken(state, "pt")) && - ParseExpression(state) && ParseType(state)) { + ParseExpression(state) && ParseUnresolvedName(state)) { return true; } state->parse_state = copy; @@ -1774,9 +2541,61 @@ static bool ParseExpression(State *state) { } state->parse_state = copy; + // Vendor extended expressions + if (ParseOneCharToken(state, 'u') && ParseSourceName(state) && + ZeroOrMore(ParseTemplateArg, state) && ParseOneCharToken(state, 'E')) { + return true; + } + state->parse_state = copy; + + // <expression> ::= rq <requirement>+ E + // + // https://github.com/itanium-cxx-abi/cxx-abi/issues/24 + if (ParseTwoCharToken(state, "rq") && OneOrMore(ParseRequirement, state) && + ParseOneCharToken(state, 'E')) { + return true; + } + state->parse_state = copy; + + // <expression> ::= rQ <bare-function-type> _ <requirement>+ E + // + // https://github.com/itanium-cxx-abi/cxx-abi/issues/24 + if (ParseTwoCharToken(state, "rQ") && ParseBareFunctionType(state) && + ParseOneCharToken(state, '_') && OneOrMore(ParseRequirement, state) && + ParseOneCharToken(state, 'E')) { + return true; + } + state->parse_state = copy; + return ParseUnresolvedName(state); } +// <initializer> ::= pi <expression>* E +// ::= il <braced-expression>* E +// +// The il ... E form is not in the ABI spec but is seen in practice for +// braced-init-lists in new-expressions, which are standard syntax from C++11 +// on. +static bool ParseInitializer(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + ParseState copy = state->parse_state; + + if (ParseTwoCharToken(state, "pi") && ZeroOrMore(ParseExpression, state) && + ParseOneCharToken(state, 'E')) { + return true; + } + state->parse_state = copy; + + if (ParseTwoCharToken(state, "il") && + ZeroOrMore(ParseBracedExpression, state) && + ParseOneCharToken(state, 'E')) { + return true; + } + state->parse_state = copy; + return false; +} + // <expr-primary> ::= L <type> <(value) number> E // ::= L <type> <(value) float> E // ::= L <mangled-name> E @@ -1819,10 +2638,35 @@ static bool ParseExprPrimary(State *state) { return false; } - // The merged cast production. - if (ParseOneCharToken(state, 'L') && ParseType(state) && - ParseExprCastValue(state)) { - return true; + if (ParseOneCharToken(state, 'L')) { + // There are two special cases in which a literal may or must contain a type + // without a value. The first is that both LDnE and LDn0E are valid + // encodings of nullptr, used in different situations. Recognize LDnE here, + // leaving LDn0E to be recognized by the general logic afterward. + if (ParseThreeCharToken(state, "DnE")) return true; + + // The second special case is a string literal, currently mangled in C++98 + // style as LA<length + 1>_KcE. This is inadequate to support C++11 and + // later versions, and the discussion of this problem has not converged. + // + // https://github.com/itanium-cxx-abi/cxx-abi/issues/64 + // + // For now the bare-type mangling is what's used in practice, so we + // recognize this form and only this form if an array type appears here. + // Someday we'll probably have to accept a new form of value mangling in + // LA...E constructs. (Note also that C++20 allows a wide range of + // class-type objects as template arguments, so someday their values will be + // mangled and we'll have to recognize them here too.) + if (RemainingInput(state)[0] == 'A' /* an array type follows */) { + if (ParseType(state) && ParseOneCharToken(state, 'E')) return true; + state->parse_state = copy; + return false; + } + + // The merged cast production. + if (ParseType(state) && ParseExprCastValueAndTrailingE(state)) { + return true; + } } state->parse_state = copy; @@ -1836,7 +2680,7 @@ static bool ParseExprPrimary(State *state) { } // <number> or <float>, followed by 'E', as described above ParseExprPrimary. -static bool ParseExprCastValue(State *state) { +static bool ParseExprCastValueAndTrailingE(State *state) { ComplexityGuard guard(state); if (guard.IsTooComplex()) return false; // We have to be able to backtrack after accepting a number because we could @@ -1848,39 +2692,148 @@ static bool ParseExprCastValue(State *state) { } state->parse_state = copy; - if (ParseFloatNumber(state) && ParseOneCharToken(state, 'E')) { + if (ParseFloatNumber(state)) { + // <float> for ordinary floating-point types + if (ParseOneCharToken(state, 'E')) return true; + + // <float> _ <float> for complex floating-point types + if (ParseOneCharToken(state, '_') && ParseFloatNumber(state) && + ParseOneCharToken(state, 'E')) { + return true; + } + } + state->parse_state = copy; + + return false; +} + +// Parses `Q <requires-clause expr>`. +// If parsing fails, applies backtracking to `state`. +// +// This function covers two symbols instead of one for convenience, +// because in LLVM's Itanium ABI mangling grammar, <requires-clause expr> +// always appears after Q. +// +// Does not emit the parsed `requires` clause to simplify the implementation. +// In other words, these two functions' mangled names will demangle identically: +// +// template <typename T> +// int foo(T) requires IsIntegral<T>; +// +// vs. +// +// template <typename T> +// int foo(T); +static bool ParseQRequiresClauseExpr(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + ParseState copy = state->parse_state; + DisableAppend(state); + + // <requires-clause expr> is just an <expression>: http://shortn/_9E1Ul0rIM8 + if (ParseOneCharToken(state, 'Q') && ParseExpression(state)) { + RestoreAppend(state, copy.append); + return true; + } + + // also restores append + state->parse_state = copy; + return false; +} + +// <requirement> ::= X <expression> [N] [R <type-constraint>] +// <requirement> ::= T <type> +// <requirement> ::= Q <constraint-expression> +// +// <constraint-expression> ::= <expression> +// +// https://github.com/itanium-cxx-abi/cxx-abi/issues/24 +static bool ParseRequirement(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + + ParseState copy = state->parse_state; + + if (ParseOneCharToken(state, 'X') && ParseExpression(state) && + Optional(ParseOneCharToken(state, 'N')) && + // This logic backtracks cleanly if we eat an R but a valid type doesn't + // follow it. + (!ParseOneCharToken(state, 'R') || ParseTypeConstraint(state))) { return true; } state->parse_state = copy; + if (ParseOneCharToken(state, 'T') && ParseType(state)) return true; + state->parse_state = copy; + + if (ParseOneCharToken(state, 'Q') && ParseExpression(state)) return true; + state->parse_state = copy; + return false; } +// <type-constraint> ::= <name> +static bool ParseTypeConstraint(State *state) { + return ParseName(state); +} + // <local-name> ::= Z <(function) encoding> E <(entity) name> [<discriminator>] // ::= Z <(function) encoding> E s [<discriminator>] +// ::= Z <(function) encoding> E d [<(parameter) number>] _ <name> // // Parsing a common prefix of these two productions together avoids an // exponential blowup of backtracking. Parse like: // <local-name> := Z <encoding> E <local-name-suffix> // <local-name-suffix> ::= s [<discriminator>] +// ::= d [<(parameter) number>] _ <name> // ::= <name> [<discriminator>] static bool ParseLocalNameSuffix(State *state) { ComplexityGuard guard(state); if (guard.IsTooComplex()) return false; + ParseState copy = state->parse_state; + + // <local-name-suffix> ::= d [<(parameter) number>] _ <name> + if (ParseOneCharToken(state, 'd') && + (IsDigit(RemainingInput(state)[0]) || RemainingInput(state)[0] == '_')) { + int number = -1; + Optional(ParseNumber(state, &number)); + if (number < -1 || number > 2147483645) { + // Work around overflow cases. We do not expect these outside of a fuzzer + // or other source of adversarial input. If we do detect overflow here, + // we'll print {default arg#1}. + number = -1; + } + number += 2; + + // The ::{default arg#1}:: infix must be rendered before the lambda itself, + // so print this before parsing the rest of the <local-name-suffix>. + MaybeAppend(state, "::{default arg#"); + MaybeAppendDecimal(state, number); + MaybeAppend(state, "}::"); + if (ParseOneCharToken(state, '_') && ParseName(state)) return true; + + // On late parse failure, roll back not only the input but also the output, + // whose trailing NUL was overwritten. + state->parse_state = copy; + if (state->parse_state.append) { + state->out[state->parse_state.out_cur_idx] = '\0'; + } + return false; + } + state->parse_state = copy; + // <local-name-suffix> ::= <name> [<discriminator>] if (MaybeAppend(state, "::") && ParseName(state) && Optional(ParseDiscriminator(state))) { return true; } - - // Since we're not going to overwrite the above "::" by re-parsing the - // <encoding> (whose trailing '\0' byte was in the byte now holding the - // first ':'), we have to rollback the "::" if the <name> parse failed. + state->parse_state = copy; if (state->parse_state.append) { - state->out[state->parse_state.out_cur_idx - 2] = '\0'; + state->out[state->parse_state.out_cur_idx] = '\0'; } + // <local-name-suffix> ::= s [<discriminator>] return ParseOneCharToken(state, 's') && Optional(ParseDiscriminator(state)); } @@ -1896,12 +2849,22 @@ static bool ParseLocalName(State *state) { return false; } -// <discriminator> := _ <(non-negative) number> +// <discriminator> := _ <digit> +// := __ <number (>= 10)> _ static bool ParseDiscriminator(State *state) { ComplexityGuard guard(state); if (guard.IsTooComplex()) return false; ParseState copy = state->parse_state; - if (ParseOneCharToken(state, '_') && ParseNumber(state, nullptr)) { + + // Both forms start with _ so parse that first. + if (!ParseOneCharToken(state, '_')) return false; + + // <digit> + if (ParseDigit(state, nullptr)) return true; + + // _ <number> _ + if (ParseOneCharToken(state, '_') && ParseNumber(state, nullptr) && + ParseOneCharToken(state, '_')) { return true; } state->parse_state = copy; @@ -1947,6 +2910,7 @@ static bool ParseSubstitution(State *state, bool accept_std) { MaybeAppend(state, p->real_name); } ++state->parse_state.mangled_idx; + UpdateHighWaterMark(state); return true; } } @@ -1972,10 +2936,13 @@ static bool ParseTopLevelMangledName(State *state) { MaybeAppend(state, RemainingInput(state)); return true; } + ReportHighWaterMark(state); return false; // Unconsumed suffix. } return true; } + + ReportHighWaterMark(state); return false; } @@ -1985,6 +2952,10 @@ static bool Overflowed(const State *state) { // The demangler entry point. bool Demangle(const char* mangled, char* out, size_t out_size) { + if (mangled[0] == '_' && mangled[1] == 'R') { + return DemangleRustSymbolEncoding(mangled, out, out_size); + } + State state; InitState(&state, mangled, out, out_size); return ParseTopLevelMangledName(&state) && !Overflowed(&state) && diff --git a/absl/debugging/internal/demangle.h b/absl/debugging/internal/demangle.h index 146d1150..cb0aba13 100644 --- a/absl/debugging/internal/demangle.h +++ b/absl/debugging/internal/demangle.h @@ -56,6 +56,9 @@ namespace debugging_internal { // // See the unit test for more examples. // +// Demangle also recognizes Rust mangled names by delegating the parsing of +// anything that starts with _R to DemangleRustSymbolEncoding (demangle_rust.h). +// // Note: we might want to write demanglers for ABIs other than Itanium // C++ ABI in the future. bool Demangle(const char* mangled, char* out, size_t out_size); diff --git a/absl/debugging/internal/demangle_rust.cc b/absl/debugging/internal/demangle_rust.cc new file mode 100644 index 00000000..4309bd84 --- /dev/null +++ b/absl/debugging/internal/demangle_rust.cc @@ -0,0 +1,925 @@ +// Copyright 2024 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/debugging/internal/demangle_rust.h" + +#include <cstddef> +#include <cstdint> +#include <cstring> +#include <limits> + +#include "absl/base/attributes.h" +#include "absl/base/config.h" +#include "absl/debugging/internal/decode_rust_punycode.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace debugging_internal { + +namespace { + +// Same step limit as the C++ demangler in demangle.cc uses. +constexpr int kMaxReturns = 1 << 17; + +bool IsDigit(char c) { return '0' <= c && c <= '9'; } +bool IsLower(char c) { return 'a' <= c && c <= 'z'; } +bool IsUpper(char c) { return 'A' <= c && c <= 'Z'; } +bool IsAlpha(char c) { return IsLower(c) || IsUpper(c); } +bool IsIdentifierChar(char c) { return IsAlpha(c) || IsDigit(c) || c == '_'; } +bool IsLowerHexDigit(char c) { return IsDigit(c) || ('a' <= c && c <= 'f'); } + +const char* BasicTypeName(char c) { + switch (c) { + case 'a': return "i8"; + case 'b': return "bool"; + case 'c': return "char"; + case 'd': return "f64"; + case 'e': return "str"; + case 'f': return "f32"; + case 'h': return "u8"; + case 'i': return "isize"; + case 'j': return "usize"; + case 'l': return "i32"; + case 'm': return "u32"; + case 'n': return "i128"; + case 'o': return "u128"; + case 'p': return "_"; + case 's': return "i16"; + case 't': return "u16"; + case 'u': return "()"; + case 'v': return "..."; + case 'x': return "i64"; + case 'y': return "u64"; + case 'z': return "!"; + } + return nullptr; +} + +// Parser for Rust symbol mangling v0, whose grammar is defined here: +// +// https://doc.rust-lang.org/rustc/symbol-mangling/v0.html#symbol-grammar-summary +class RustSymbolParser { + public: + // Prepares to demangle the given encoding, a Rust symbol name starting with + // _R, into the output buffer [out, out_end). The caller is expected to + // continue by calling the new object's Parse function. + RustSymbolParser(const char* encoding, char* out, char* const out_end) + : encoding_(encoding), out_(out), out_end_(out_end) { + if (out_ != out_end_) *out_ = '\0'; + } + + // Parses the constructor's encoding argument, writing output into the range + // [out, out_end). Returns true on success and false for input whose + // structure was not recognized or exceeded implementation limits, such as by + // nesting structures too deep. In either case *this should not be used + // again. + ABSL_MUST_USE_RESULT bool Parse() && { + // Recursively parses the grammar production named by callee, then resumes + // execution at the next statement. + // + // Recursive-descent parsing is a beautifully readable translation of a + // grammar, but it risks stack overflow if implemented by naive recursion on + // the C++ call stack. So we simulate recursion by goto and switch instead, + // keeping a bounded stack of "return addresses" in the recursion_stack_ + // member. + // + // The callee argument is a statement label. We goto that label after + // saving the "return address" on recursion_stack_. The next continue + // statement in the for loop below "returns" from this "call". + // + // The caller argument names the return point. Each value of caller must + // appear in only one ABSL_DEMANGLER_RECURSE call and be listed in the + // definition of enum ReturnAddress. The switch implements the control + // transfer from the end of a "called" subroutine back to the statement + // after the "call". + // + // Note that not all the grammar productions have to be packed into the + // switch, but only those which appear in a cycle in the grammar. Anything + // acyclic can be written as ordinary functions and function calls, e.g., + // ParseIdentifier. +#define ABSL_DEMANGLER_RECURSE(callee, caller) \ + do { \ + if (recursion_depth_ == kStackSize) return false; \ + /* The next continue will switch on this saved value ... */ \ + recursion_stack_[recursion_depth_++] = caller; \ + goto callee; \ + /* ... and will land here, resuming the suspended code. */ \ + case caller: {} \ + } while (0) + + // Parse the encoding, counting completed recursive calls to guard against + // excessively complex input and infinite-loop bugs. + int iter = 0; + goto whole_encoding; + for (; iter < kMaxReturns && recursion_depth_ > 0; ++iter) { + // This switch resumes the code path most recently suspended by + // ABSL_DEMANGLER_RECURSE. + switch (recursion_stack_[--recursion_depth_]) { + // + // symbol-name -> + // _R decimal-number? path instantiating-crate? vendor-specific-suffix? + whole_encoding: + if (!Eat('_') || !Eat('R')) return false; + // decimal-number? is always empty today, so proceed to path, which + // can't start with a decimal digit. + ABSL_DEMANGLER_RECURSE(path, kInstantiatingCrate); + if (IsAlpha(Peek())) { + ++silence_depth_; // Print nothing more from here on. + ABSL_DEMANGLER_RECURSE(path, kVendorSpecificSuffix); + } + switch (Take()) { + case '.': case '$': case '\0': return true; + } + return false; // unexpected trailing content + + // path -> crate-root | inherent-impl | trait-impl | trait-definition | + // nested-path | generic-args | backref + // + // Note that ABSL_DEMANGLER_RECURSE does not work inside a nested switch + // (which would hide the generated case label). Thus we jump out of the + // inner switch with gotos before performing any fake recursion. + path: + switch (Take()) { + case 'C': goto crate_root; + case 'M': goto inherent_impl; + case 'X': goto trait_impl; + case 'Y': goto trait_definition; + case 'N': goto nested_path; + case 'I': goto generic_args; + case 'B': goto path_backref; + default: return false; + } + + // crate-root -> C identifier (C consumed above) + crate_root: + if (!ParseIdentifier()) return false; + continue; + + // inherent-impl -> M impl-path type (M already consumed) + inherent_impl: + if (!Emit("<")) return false; + ABSL_DEMANGLER_RECURSE(impl_path, kInherentImplType); + ABSL_DEMANGLER_RECURSE(type, kInherentImplEnding); + if (!Emit(">")) return false; + continue; + + // trait-impl -> X impl-path type path (X already consumed) + trait_impl: + if (!Emit("<")) return false; + ABSL_DEMANGLER_RECURSE(impl_path, kTraitImplType); + ABSL_DEMANGLER_RECURSE(type, kTraitImplInfix); + if (!Emit(" as ")) return false; + ABSL_DEMANGLER_RECURSE(path, kTraitImplEnding); + if (!Emit(">")) return false; + continue; + + // impl-path -> disambiguator? path (but never print it!) + impl_path: + ++silence_depth_; + { + int ignored_disambiguator; + if (!ParseDisambiguator(ignored_disambiguator)) return false; + } + ABSL_DEMANGLER_RECURSE(path, kImplPathEnding); + --silence_depth_; + continue; + + // trait-definition -> Y type path (Y already consumed) + trait_definition: + if (!Emit("<")) return false; + ABSL_DEMANGLER_RECURSE(type, kTraitDefinitionInfix); + if (!Emit(" as ")) return false; + ABSL_DEMANGLER_RECURSE(path, kTraitDefinitionEnding); + if (!Emit(">")) return false; + continue; + + // nested-path -> N namespace path identifier (N already consumed) + // namespace -> lower | upper + nested_path: + // Uppercase namespaces must be saved on a stack so we can print + // ::{closure#0} or ::{shim:vtable#0} or ::{X:name#0} as needed. + if (IsUpper(Peek())) { + if (!PushNamespace(Take())) return false; + ABSL_DEMANGLER_RECURSE(path, kIdentifierInUppercaseNamespace); + if (!Emit("::")) return false; + if (!ParseIdentifier(PopNamespace())) return false; + continue; + } + + // Lowercase namespaces, however, are never represented in the output; + // they all emit just ::name. + if (IsLower(Take())) { + ABSL_DEMANGLER_RECURSE(path, kIdentifierInLowercaseNamespace); + if (!Emit("::")) return false; + if (!ParseIdentifier()) return false; + continue; + } + + // Neither upper or lower + return false; + + // type -> basic-type | array-type | slice-type | tuple-type | + // ref-type | mut-ref-type | const-ptr-type | mut-ptr-type | + // fn-type | dyn-trait-type | path | backref + // + // We use ifs instead of switch (Take()) because the default case jumps + // to path, which will need to see the first character not yet Taken + // from the input. Because we do not use a nested switch here, + // ABSL_DEMANGLER_RECURSE works fine in the 'S' case. + type: + if (IsLower(Peek())) { + const char* type_name = BasicTypeName(Take()); + if (type_name == nullptr || !Emit(type_name)) return false; + continue; + } + if (Eat('A')) { + // array-type = A type const + if (!Emit("[")) return false; + ABSL_DEMANGLER_RECURSE(type, kArraySize); + if (!Emit("; ")) return false; + ABSL_DEMANGLER_RECURSE(constant, kFinishArray); + if (!Emit("]")) return false; + continue; + } + if (Eat('S')) { + if (!Emit("[")) return false; + ABSL_DEMANGLER_RECURSE(type, kSliceEnding); + if (!Emit("]")) return false; + continue; + } + if (Eat('T')) goto tuple_type; + if (Eat('R')) { + if (!Emit("&")) return false; + if (!ParseOptionalLifetime()) return false; + goto type; + } + if (Eat('Q')) { + if (!Emit("&mut ")) return false; + if (!ParseOptionalLifetime()) return false; + goto type; + } + if (Eat('P')) { + if (!Emit("*const ")) return false; + goto type; + } + if (Eat('O')) { + if (!Emit("*mut ")) return false; + goto type; + } + if (Eat('F')) goto fn_type; + if (Eat('D')) goto dyn_trait_type; + if (Eat('B')) goto type_backref; + goto path; + + // tuple-type -> T type* E (T already consumed) + tuple_type: + if (!Emit("(")) return false; + + // The toolchain should call the unit type u instead of TE, but the + // grammar and other demanglers also recognize TE, so we do too. + if (Eat('E')) { + if (!Emit(")")) return false; + continue; + } + + // A tuple with one element is rendered (type,) instead of (type). + ABSL_DEMANGLER_RECURSE(type, kAfterFirstTupleElement); + if (Eat('E')) { + if (!Emit(",)")) return false; + continue; + } + + // A tuple with two elements is of course (x, y). + if (!Emit(", ")) return false; + ABSL_DEMANGLER_RECURSE(type, kAfterSecondTupleElement); + if (Eat('E')) { + if (!Emit(")")) return false; + continue; + } + + // And (x, y, z) for three elements. + if (!Emit(", ")) return false; + ABSL_DEMANGLER_RECURSE(type, kAfterThirdTupleElement); + if (Eat('E')) { + if (!Emit(")")) return false; + continue; + } + + // For longer tuples we write (x, y, z, ...), printing none of the + // content of the fourth and later types. Thus we avoid exhausting + // output buffers and human readers' patience when some library has a + // long tuple as an implementation detail, without having to + // completely obfuscate all tuples. + if (!Emit(", ...)")) return false; + ++silence_depth_; + while (!Eat('E')) { + ABSL_DEMANGLER_RECURSE(type, kAfterSubsequentTupleElement); + } + --silence_depth_; + continue; + + // fn-type -> F fn-sig (F already consumed) + // fn-sig -> binder? U? (K abi)? type* E type + // abi -> C | undisambiguated-identifier + // + // We follow the C++ demangler in suppressing details of function + // signatures. Every function type is rendered "fn...". + fn_type: + if (!Emit("fn...")) return false; + ++silence_depth_; + if (!ParseOptionalBinder()) return false; + (void)Eat('U'); + if (Eat('K')) { + if (!Eat('C') && !ParseUndisambiguatedIdentifier()) return false; + } + while (!Eat('E')) { + ABSL_DEMANGLER_RECURSE(type, kContinueParameterList); + } + ABSL_DEMANGLER_RECURSE(type, kFinishFn); + --silence_depth_; + continue; + + // dyn-trait-type -> D dyn-bounds lifetime (D already consumed) + // dyn-bounds -> binder? dyn-trait* E + // + // The grammar strangely allows an empty trait list, even though the + // compiler should never output one. We follow existing demanglers in + // rendering DEL_ as "dyn ". + // + // Because auto traits lengthen a type name considerably without + // providing much value to a search for related source code, it would be + // desirable to abbreviate + // dyn main::Trait + std::marker::Copy + std::marker::Send + // to + // dyn main::Trait + ..., + // eliding the auto traits. But it is difficult to do so correctly, in + // part because there is no guarantee that the mangling will list the + // main trait first. So we just print all the traits in their order of + // appearance in the mangled name. + dyn_trait_type: + if (!Emit("dyn ")) return false; + if (!ParseOptionalBinder()) return false; + if (!Eat('E')) { + ABSL_DEMANGLER_RECURSE(dyn_trait, kBeginAutoTraits); + while (!Eat('E')) { + if (!Emit(" + ")) return false; + ABSL_DEMANGLER_RECURSE(dyn_trait, kContinueAutoTraits); + } + } + if (!ParseRequiredLifetime()) return false; + continue; + + // dyn-trait -> path dyn-trait-assoc-binding* + // dyn-trait-assoc-binding -> p undisambiguated-identifier type + // + // We render nonempty binding lists as <>, omitting their contents as + // for generic-args. + dyn_trait: + ABSL_DEMANGLER_RECURSE(path, kContinueDynTrait); + if (Peek() == 'p') { + if (!Emit("<>")) return false; + ++silence_depth_; + while (Eat('p')) { + if (!ParseUndisambiguatedIdentifier()) return false; + ABSL_DEMANGLER_RECURSE(type, kContinueAssocBinding); + } + --silence_depth_; + } + continue; + + // const -> type const-data | p | backref + // + // const is a C++ keyword, so we use the label `constant` instead. + constant: + if (Eat('B')) goto const_backref; + if (Eat('p')) { + if (!Emit("_")) return false; + continue; + } + + // Scan the type without printing it. + // + // The Rust language restricts the type of a const generic argument + // much more than the mangling grammar does. We do not enforce this. + // + // We also do not bother printing false, true, 'A', and '\u{abcd}' for + // the types bool and char. Because we do not print generic-args + // contents, we expect to print constants only in array sizes, and + // those should not be bool or char. + ++silence_depth_; + ABSL_DEMANGLER_RECURSE(type, kConstData); + --silence_depth_; + + // const-data -> n? hex-digit* _ + // + // Although the grammar doesn't say this, existing demanglers expect + // that zero is 0, not an empty digit sequence, and no nonzero value + // may have leading zero digits. Also n0_ is accepted and printed as + // -0, though a toolchain will probably never write that encoding. + if (Eat('n') && !EmitChar('-')) return false; + if (!Emit("0x")) return false; + if (Eat('0')) { + if (!EmitChar('0')) return false; + if (!Eat('_')) return false; + continue; + } + while (IsLowerHexDigit(Peek())) { + if (!EmitChar(Take())) return false; + } + if (!Eat('_')) return false; + continue; + + // generic-args -> I path generic-arg* E (I already consumed) + // + // We follow the C++ demangler in omitting all the arguments from the + // output, printing only the list opening and closing tokens. + generic_args: + ABSL_DEMANGLER_RECURSE(path, kBeginGenericArgList); + if (!Emit("::<>")) return false; + ++silence_depth_; + while (!Eat('E')) { + ABSL_DEMANGLER_RECURSE(generic_arg, kContinueGenericArgList); + } + --silence_depth_; + continue; + + // generic-arg -> lifetime | type | K const + generic_arg: + if (Peek() == 'L') { + if (!ParseOptionalLifetime()) return false; + continue; + } + if (Eat('K')) goto constant; + goto type; + + // backref -> B base-62-number (B already consumed) + // + // The BeginBackref call parses and range-checks the base-62-number. We + // always do that much. + // + // The recursive call parses and prints what the backref points at. We + // save CPU and stack by skipping this work if the output would be + // suppressed anyway. + path_backref: + if (!BeginBackref()) return false; + if (silence_depth_ == 0) { + ABSL_DEMANGLER_RECURSE(path, kPathBackrefEnding); + } + EndBackref(); + continue; + + // This represents the same backref production as in path_backref but + // parses the target as a type instead of a path. + type_backref: + if (!BeginBackref()) return false; + if (silence_depth_ == 0) { + ABSL_DEMANGLER_RECURSE(type, kTypeBackrefEnding); + } + EndBackref(); + continue; + + const_backref: + if (!BeginBackref()) return false; + if (silence_depth_ == 0) { + ABSL_DEMANGLER_RECURSE(constant, kConstantBackrefEnding); + } + EndBackref(); + continue; + } + } + + return false; // hit iteration limit or a bug in our stack handling + } + + private: + // Enumerates resumption points for ABSL_DEMANGLER_RECURSE calls. + enum ReturnAddress : uint8_t { + kInstantiatingCrate, + kVendorSpecificSuffix, + kIdentifierInUppercaseNamespace, + kIdentifierInLowercaseNamespace, + kInherentImplType, + kInherentImplEnding, + kTraitImplType, + kTraitImplInfix, + kTraitImplEnding, + kImplPathEnding, + kTraitDefinitionInfix, + kTraitDefinitionEnding, + kArraySize, + kFinishArray, + kSliceEnding, + kAfterFirstTupleElement, + kAfterSecondTupleElement, + kAfterThirdTupleElement, + kAfterSubsequentTupleElement, + kContinueParameterList, + kFinishFn, + kBeginAutoTraits, + kContinueAutoTraits, + kContinueDynTrait, + kContinueAssocBinding, + kConstData, + kBeginGenericArgList, + kContinueGenericArgList, + kPathBackrefEnding, + kTypeBackrefEnding, + kConstantBackrefEnding, + }; + + // Element counts for the stack arrays. Larger stack sizes accommodate more + // deeply nested names at the cost of a larger footprint on the C++ call + // stack. + enum { + // Maximum recursive calls outstanding at one time. + kStackSize = 256, + + // Maximum N<uppercase> nested-paths open at once. We do not expect + // closures inside closures inside closures as much as functions inside + // modules inside other modules, so we can use a smaller array here. + kNamespaceStackSize = 64, + + // Maximum number of nested backrefs. We can keep this stack pretty small + // because we do not follow backrefs inside generic-args or other contexts + // that suppress printing, so deep stacking is unlikely in practice. + kPositionStackSize = 16, + }; + + // Returns the next input character without consuming it. + char Peek() const { return encoding_[pos_]; } + + // Consumes and returns the next input character. + char Take() { return encoding_[pos_++]; } + + // If the next input character is the given character, consumes it and returns + // true; otherwise returns false without consuming a character. + ABSL_MUST_USE_RESULT bool Eat(char want) { + if (encoding_[pos_] != want) return false; + ++pos_; + return true; + } + + // Provided there is enough remaining output space, appends c to the output, + // writing a fresh NUL terminator afterward, and returns true. Returns false + // if the output buffer had less than two bytes free. + ABSL_MUST_USE_RESULT bool EmitChar(char c) { + if (silence_depth_ > 0) return true; + if (out_end_ - out_ < 2) return false; + *out_++ = c; + *out_ = '\0'; + return true; + } + + // Provided there is enough remaining output space, appends the C string token + // to the output, followed by a NUL character, and returns true. Returns + // false if not everything fit into the output buffer. + ABSL_MUST_USE_RESULT bool Emit(const char* token) { + if (silence_depth_ > 0) return true; + const size_t token_length = std::strlen(token); + const size_t bytes_to_copy = token_length + 1; // token and final NUL + if (static_cast<size_t>(out_end_ - out_) < bytes_to_copy) return false; + std::memcpy(out_, token, bytes_to_copy); + out_ += token_length; + return true; + } + + // Provided there is enough remaining output space, appends the decimal form + // of disambiguator (if it's nonnegative) or "?" (if it's negative) to the + // output, followed by a NUL character, and returns true. Returns false if + // not everything fit into the output buffer. + ABSL_MUST_USE_RESULT bool EmitDisambiguator(int disambiguator) { + if (disambiguator < 0) return EmitChar('?'); // parsed but too large + if (disambiguator == 0) return EmitChar('0'); + // Convert disambiguator to decimal text. Three digits per byte is enough + // because 999 > 256. The bound will remain correct even if future + // maintenance changes the type of the disambiguator variable. + char digits[3 * sizeof(disambiguator)] = {}; + size_t leading_digit_index = sizeof(digits) - 1; + for (; disambiguator > 0; disambiguator /= 10) { + digits[--leading_digit_index] = + static_cast<char>('0' + disambiguator % 10); + } + return Emit(digits + leading_digit_index); + } + + // Consumes an optional disambiguator (s123_) from the input. + // + // On success returns true and fills value with the encoded value if it was + // not too big, otherwise with -1. If the optional disambiguator was omitted, + // value is 0. On parse failure returns false and sets value to -1. + ABSL_MUST_USE_RESULT bool ParseDisambiguator(int& value) { + value = -1; + + // disambiguator = s base-62-number + // + // Disambiguators are optional. An omitted disambiguator is zero. + if (!Eat('s')) { + value = 0; + return true; + } + int base_62_value = 0; + if (!ParseBase62Number(base_62_value)) return false; + value = base_62_value < 0 ? -1 : base_62_value + 1; + return true; + } + + // Consumes a base-62 number like _ or 123_ from the input. + // + // On success returns true and fills value with the encoded value if it was + // not too big, otherwise with -1. On parse failure returns false and sets + // value to -1. + ABSL_MUST_USE_RESULT bool ParseBase62Number(int& value) { + value = -1; + + // base-62-number = (digit | lower | upper)* _ + // + // An empty base-62 digit sequence means 0. + if (Eat('_')) { + value = 0; + return true; + } + + // A nonempty digit sequence denotes its base-62 value plus 1. + int encoded_number = 0; + bool overflowed = false; + while (IsAlpha(Peek()) || IsDigit(Peek())) { + const char c = Take(); + if (encoded_number >= std::numeric_limits<int>::max()/62) { + // If we are close to overflowing an int, keep parsing but stop updating + // encoded_number and remember to return -1 at the end. The point is to + // avoid undefined behavior while parsing crate-root disambiguators, + // which are large in practice but not shown in demangling, while + // successfully computing closure and shim disambiguators, which are + // typically small and are printed out. + overflowed = true; + } else { + int digit; + if (IsDigit(c)) { + digit = c - '0'; + } else if (IsLower(c)) { + digit = c - 'a' + 10; + } else { + digit = c - 'A' + 36; + } + encoded_number = 62 * encoded_number + digit; + } + } + + if (!Eat('_')) return false; + if (!overflowed) value = encoded_number + 1; + return true; + } + + // Consumes an identifier from the input, returning true on success. + // + // A nonzero uppercase_namespace specifies the character after the N in a + // nested-identifier, e.g., 'C' for a closure, allowing ParseIdentifier to + // write out the name with the conventional decoration for that namespace. + ABSL_MUST_USE_RESULT bool ParseIdentifier(char uppercase_namespace = '\0') { + // identifier -> disambiguator? undisambiguated-identifier + int disambiguator = 0; + if (!ParseDisambiguator(disambiguator)) return false; + + return ParseUndisambiguatedIdentifier(uppercase_namespace, disambiguator); + } + + // Consumes from the input an identifier with no preceding disambiguator, + // returning true on success. + // + // When ParseIdentifier calls this, it passes the N<namespace> character and + // disambiguator value so that "{closure#42}" and similar forms can be + // rendered correctly. + // + // At other appearances of undisambiguated-identifier in the grammar, this + // treatment is not applicable, and the call site omits both arguments. + ABSL_MUST_USE_RESULT bool ParseUndisambiguatedIdentifier( + char uppercase_namespace = '\0', int disambiguator = 0) { + // undisambiguated-identifier -> u? decimal-number _? bytes + const bool is_punycoded = Eat('u'); + if (!IsDigit(Peek())) return false; + int num_bytes = 0; + if (!ParseDecimalNumber(num_bytes)) return false; + (void)Eat('_'); // optional separator, needed if a digit follows + if (is_punycoded) { + DecodeRustPunycodeOptions options; + options.punycode_begin = &encoding_[pos_]; + options.punycode_end = &encoding_[pos_] + num_bytes; + options.out_begin = out_; + options.out_end = out_end_; + out_ = DecodeRustPunycode(options); + if (out_ == nullptr) return false; + pos_ += static_cast<size_t>(num_bytes); + } + + // Emit the beginnings of braced forms like {shim:vtable#0}. + if (uppercase_namespace != '\0') { + switch (uppercase_namespace) { + case 'C': + if (!Emit("{closure")) return false; + break; + case 'S': + if (!Emit("{shim")) return false; + break; + default: + if (!EmitChar('{') || !EmitChar(uppercase_namespace)) return false; + break; + } + if (num_bytes > 0 && !Emit(":")) return false; + } + + // Emit the name itself. + if (!is_punycoded) { + for (int i = 0; i < num_bytes; ++i) { + const char c = Take(); + if (!IsIdentifierChar(c) && + // The spec gives toolchains the choice of Punycode or raw UTF-8 for + // identifiers containing code points above 0x7f, so accept bytes + // with the high bit set. + (c & 0x80) == 0) { + return false; + } + if (!EmitChar(c)) return false; + } + } + + // Emit the endings of braced forms, e.g., "#42}". + if (uppercase_namespace != '\0') { + if (!EmitChar('#')) return false; + if (!EmitDisambiguator(disambiguator)) return false; + if (!EmitChar('}')) return false; + } + + return true; + } + + // Consumes a decimal number like 0 or 123 from the input. On success returns + // true and fills value with the encoded value. If the encoded value is too + // large or otherwise unparsable, returns false and sets value to -1. + ABSL_MUST_USE_RESULT bool ParseDecimalNumber(int& value) { + value = -1; + if (!IsDigit(Peek())) return false; + int encoded_number = Take() - '0'; + if (encoded_number == 0) { + // Decimal numbers are never encoded with extra leading zeroes. + value = 0; + return true; + } + while (IsDigit(Peek()) && + // avoid overflow + encoded_number < std::numeric_limits<int>::max()/10) { + encoded_number = 10 * encoded_number + (Take() - '0'); + } + if (IsDigit(Peek())) return false; // too big + value = encoded_number; + return true; + } + + // Consumes a binder of higher-ranked lifetimes if one is present. On success + // returns true and discards the encoded lifetime count. On parse failure + // returns false. + ABSL_MUST_USE_RESULT bool ParseOptionalBinder() { + // binder -> G base-62-number + if (!Eat('G')) return true; + int ignored_binding_count; + return ParseBase62Number(ignored_binding_count); + } + + // Consumes a lifetime if one is present. + // + // On success returns true and discards the lifetime index. We do not print + // or even range-check lifetimes because they are a finer detail than other + // things we omit from output, such as the entire contents of generic-args. + // + // On parse failure returns false. + ABSL_MUST_USE_RESULT bool ParseOptionalLifetime() { + // lifetime -> L base-62-number + if (!Eat('L')) return true; + int ignored_de_bruijn_index; + return ParseBase62Number(ignored_de_bruijn_index); + } + + // Consumes a lifetime just like ParseOptionalLifetime, but returns false if + // there is no lifetime here. + ABSL_MUST_USE_RESULT bool ParseRequiredLifetime() { + if (Peek() != 'L') return false; + return ParseOptionalLifetime(); + } + + // Pushes ns onto the namespace stack and returns true if the stack is not + // full, else returns false. + ABSL_MUST_USE_RESULT bool PushNamespace(char ns) { + if (namespace_depth_ == kNamespaceStackSize) return false; + namespace_stack_[namespace_depth_++] = ns; + return true; + } + + // Pops the last pushed namespace. Requires that the namespace stack is not + // empty (namespace_depth_ > 0). + char PopNamespace() { return namespace_stack_[--namespace_depth_]; } + + // Pushes position onto the position stack and returns true if the stack is + // not full, else returns false. + ABSL_MUST_USE_RESULT bool PushPosition(int position) { + if (position_depth_ == kPositionStackSize) return false; + position_stack_[position_depth_++] = position; + return true; + } + + // Pops the last pushed input position. Requires that the position stack is + // not empty (position_depth_ > 0). + int PopPosition() { return position_stack_[--position_depth_]; } + + // Consumes a base-62-number denoting a backref target, pushes the current + // input position on the data stack, and sets the input position to the + // beginning of the backref target. Returns true on success. Returns false + // if parsing failed, the stack is exhausted, or the backref target position + // is out of range. + ABSL_MUST_USE_RESULT bool BeginBackref() { + // backref = B base-62-number (B already consumed) + // + // Reject backrefs that don't parse, overflow int, or don't point backward. + // If the offset looks fine, adjust it to account for the _R prefix. + int offset = 0; + const int offset_of_this_backref = + pos_ - 2 /* _R */ - 1 /* B already consumed */; + if (!ParseBase62Number(offset) || offset < 0 || + offset >= offset_of_this_backref) { + return false; + } + offset += 2; + + // Save the old position to restore later. + if (!PushPosition(pos_)) return false; + + // Move the input position to the backref target. + // + // Note that we do not check whether the new position points to the + // beginning of a construct matching the context in which the backref + // appeared. We just jump to it and see whether nested parsing succeeds. + // We therefore accept various wrong manglings, e.g., a type backref + // pointing to an 'l' character inside an identifier, which happens to mean + // i32 when parsed as a type mangling. This saves the complexity and RAM + // footprint of remembering which offsets began which kinds of + // substructures. Existing demanglers take similar shortcuts. + pos_ = offset; + return true; + } + + // Cleans up after a backref production by restoring the previous input + // position from the data stack. + void EndBackref() { pos_ = PopPosition(); } + + // The leftmost recursion_depth_ elements of recursion_stack_ contain the + // ReturnAddresses pushed by ABSL_DEMANGLER_RECURSE calls not yet completed. + ReturnAddress recursion_stack_[kStackSize] = {}; + int recursion_depth_ = 0; + + // The leftmost namespace_depth_ elements of namespace_stack_ contain the + // uppercase namespace identifiers for open nested-paths, e.g., 'C' for a + // closure. + char namespace_stack_[kNamespaceStackSize] = {}; + int namespace_depth_ = 0; + + // The leftmost position_depth_ elements of position_stack_ contain the input + // positions to return to after fully printing the targets of backrefs. + int position_stack_[kPositionStackSize] = {}; + int position_depth_ = 0; + + // Anything parsed while silence_depth_ > 0 contributes nothing to the + // demangled output. For constructs omitted from the demangling, such as + // impl-path and the contents of generic-args, we will increment + // silence_depth_ on the way in and decrement silence_depth_ on the way out. + int silence_depth_ = 0; + + // Input: encoding_ points to a Rust mangled symbol, and encoding_[pos_] is + // the next input character to be scanned. + int pos_ = 0; + const char* encoding_ = nullptr; + + // Output: *out_ is where the next output character should be written, and + // out_end_ points past the last byte of available space. + char* out_ = nullptr; + char* out_end_ = nullptr; +}; + +} // namespace + +bool DemangleRustSymbolEncoding(const char* mangled, char* out, + size_t out_size) { + return RustSymbolParser(mangled, out, out + out_size).Parse(); +} + +} // namespace debugging_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/debugging/internal/demangle_rust.h b/absl/debugging/internal/demangle_rust.h new file mode 100644 index 00000000..94a9aecb --- /dev/null +++ b/absl/debugging/internal/demangle_rust.h @@ -0,0 +1,42 @@ +// Copyright 2024 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_DEBUGGING_INTERNAL_DEMANGLE_RUST_H_ +#define ABSL_DEBUGGING_INTERNAL_DEMANGLE_RUST_H_ + +#include <cstddef> + +#include "absl/base/config.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace debugging_internal { + +// Demangle the Rust encoding `mangled`. On success, return true and write the +// demangled symbol name to `out`. Otherwise, return false, leaving unspecified +// contents in `out`. For example, calling DemangleRustSymbolEncoding with +// `mangled = "_RNvC8my_crate7my_func"` will yield `my_crate::my_func` in `out`, +// provided `out_size` is large enough for that value and its trailing NUL. +// +// DemangleRustSymbolEncoding is async-signal-safe and runs in bounded C++ +// call-stack space. It is suitable for symbolizing stack traces in a signal +// handler. +bool DemangleRustSymbolEncoding(const char* mangled, char* out, + size_t out_size); + +} // namespace debugging_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_DEBUGGING_INTERNAL_DEMANGLE_RUST_H_ diff --git a/absl/debugging/internal/demangle_rust_test.cc b/absl/debugging/internal/demangle_rust_test.cc new file mode 100644 index 00000000..8ceb1fd7 --- /dev/null +++ b/absl/debugging/internal/demangle_rust_test.cc @@ -0,0 +1,584 @@ +// Copyright 2024 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/debugging/internal/demangle_rust.h" + +#include <cstddef> +#include <string> + +#include "gtest/gtest.h" +#include "absl/base/config.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace debugging_internal { +namespace { + +// If DemangleRustSymbolEncoding(mangled, <buffer with room for buffer_size +// chars>, buffer_size) returns true and seems not to have overrun its output +// buffer, returns the string written by DemangleRustSymbolEncoding; otherwise +// returns an error message. +std::string ResultOfDemangling(const char* mangled, size_t buffer_size) { + // Fill the buffer with something other than NUL so we test whether Demangle + // appends trailing NUL as expected. + std::string buffer(buffer_size + 1, '~'); + constexpr char kCanaryCharacter = 0x7f; // arbitrary unlikely value + buffer[buffer_size] = kCanaryCharacter; + if (!DemangleRustSymbolEncoding(mangled, &buffer[0], buffer_size)) { + return "Failed parse"; + } + if (buffer[buffer_size] != kCanaryCharacter) { + return "Buffer overrun by output: " + buffer.substr(0, buffer_size + 1) + + "..."; + } + return buffer.data(); // Not buffer itself: this trims trailing padding. +} + +// Tests that DemangleRustSymbolEncoding converts mangled into plaintext given +// enough output buffer space but returns false and avoids overrunning a buffer +// that is one byte too short. +// +// The lambda wrapping allows ASSERT_EQ to branch out the first time an +// expectation is not satisfied, preventing redundant errors for the same bug. +// +// We test first with excess space so that if the algorithm just computes the +// wrong answer, it will be clear from the error log that the bounds checks are +// unlikely to be the code at fault. +#define EXPECT_DEMANGLING(mangled, plaintext) \ + do { \ + [] { \ + constexpr size_t plenty_of_space = sizeof(plaintext) + 128; \ + constexpr size_t just_enough_space = sizeof(plaintext); \ + constexpr size_t one_byte_too_few = sizeof(plaintext) - 1; \ + const char* expected_plaintext = plaintext; \ + const char* expected_error = "Failed parse"; \ + ASSERT_EQ(ResultOfDemangling(mangled, plenty_of_space), \ + expected_plaintext); \ + ASSERT_EQ(ResultOfDemangling(mangled, just_enough_space), \ + expected_plaintext); \ + ASSERT_EQ(ResultOfDemangling(mangled, one_byte_too_few), \ + expected_error); \ + }(); \ + } while (0) + +// Tests that DemangleRustSymbolEncoding rejects the given input (typically, a +// truncation of a real Rust symbol name). +#define EXPECT_DEMANGLING_FAILS(mangled) \ + do { \ + constexpr size_t plenty_of_space = 1024; \ + const char* expected_error = "Failed parse"; \ + EXPECT_EQ(ResultOfDemangling(mangled, plenty_of_space), expected_error); \ + } while (0) + +// Piping grep -C 1 _R demangle_test.cc into your favorite c++filt +// implementation allows you to verify that the goldens below are reasonable. + +TEST(DemangleRust, EmptyDemangling) { + EXPECT_TRUE(DemangleRustSymbolEncoding("_RC0", nullptr, 0)); +} + +TEST(DemangleRust, FunctionAtCrateLevel) { + EXPECT_DEMANGLING("_RNvC10crate_name9func_name", "crate_name::func_name"); + EXPECT_DEMANGLING( + "_RNvCs09azAZ_10crate_name9func_name", "crate_name::func_name"); +} + +TEST(DemangleRust, TruncationsOfFunctionAtCrateLevel) { + EXPECT_DEMANGLING_FAILS("_R"); + EXPECT_DEMANGLING_FAILS("_RN"); + EXPECT_DEMANGLING_FAILS("_RNvC"); + EXPECT_DEMANGLING_FAILS("_RNvC10"); + EXPECT_DEMANGLING_FAILS("_RNvC10crate_nam"); + EXPECT_DEMANGLING_FAILS("_RNvC10crate_name"); + EXPECT_DEMANGLING_FAILS("_RNvC10crate_name9"); + EXPECT_DEMANGLING_FAILS("_RNvC10crate_name9func_nam"); + EXPECT_DEMANGLING_FAILS("_RNvCs"); + EXPECT_DEMANGLING_FAILS("_RNvCs09azAZ"); + EXPECT_DEMANGLING_FAILS("_RNvCs09azAZ_"); +} + +TEST(DemangleRust, VendorSuffixes) { + EXPECT_DEMANGLING("_RNvC10crate_name9func_name.!@#", "crate_name::func_name"); + EXPECT_DEMANGLING("_RNvC10crate_name9func_name$!@#", "crate_name::func_name"); +} + +TEST(DemangleRust, UnicodeIdentifiers) { + EXPECT_DEMANGLING("_RNvC7ice_cap17Eyjafjallajökull", + "ice_cap::Eyjafjallajökull"); + EXPECT_DEMANGLING("_RNvC7ice_caps_u19Eyjafjallajkull_jtb", + "ice_cap::Eyjafjallajökull"); +} + +TEST(DemangleRust, FunctionInModule) { + EXPECT_DEMANGLING("_RNvNtCs09azAZ_10crate_name11module_name9func_name", + "crate_name::module_name::func_name"); +} + +TEST(DemangleRust, FunctionInFunction) { + EXPECT_DEMANGLING( + "_RNvNvCs09azAZ_10crate_name15outer_func_name15inner_func_name", + "crate_name::outer_func_name::inner_func_name"); +} + +TEST(DemangleRust, ClosureInFunction) { + EXPECT_DEMANGLING( + "_RNCNvCs09azAZ_10crate_name9func_name0", + "crate_name::func_name::{closure#0}"); + EXPECT_DEMANGLING( + "_RNCNvCs09azAZ_10crate_name9func_name0Cs123_12client_crate", + "crate_name::func_name::{closure#0}"); +} + +TEST(DemangleRust, ClosureNumbering) { + EXPECT_DEMANGLING( + "_RNCNvCs09azAZ_10crate_name9func_names_0Cs123_12client_crate", + "crate_name::func_name::{closure#1}"); + EXPECT_DEMANGLING( + "_RNCNvCs09azAZ_10crate_name9func_names0_0Cs123_12client_crate", + "crate_name::func_name::{closure#2}"); + EXPECT_DEMANGLING( + "_RNCNvCs09azAZ_10crate_name9func_names9_0Cs123_12client_crate", + "crate_name::func_name::{closure#11}"); + EXPECT_DEMANGLING( + "_RNCNvCs09azAZ_10crate_name9func_namesa_0Cs123_12client_crate", + "crate_name::func_name::{closure#12}"); + EXPECT_DEMANGLING( + "_RNCNvCs09azAZ_10crate_name9func_namesz_0Cs123_12client_crate", + "crate_name::func_name::{closure#37}"); + EXPECT_DEMANGLING( + "_RNCNvCs09azAZ_10crate_name9func_namesA_0Cs123_12client_crate", + "crate_name::func_name::{closure#38}"); + EXPECT_DEMANGLING( + "_RNCNvCs09azAZ_10crate_name9func_namesZ_0Cs123_12client_crate", + "crate_name::func_name::{closure#63}"); + EXPECT_DEMANGLING( + "_RNCNvCs09azAZ_10crate_name9func_names10_0Cs123_12client_crate", + "crate_name::func_name::{closure#64}"); + EXPECT_DEMANGLING( + "_RNCNvCs09azAZ_10crate_name9func_namesg6_0Cs123_12client_crate", + "crate_name::func_name::{closure#1000}"); +} + +TEST(DemangleRust, ClosureNumberOverflowingInt) { + EXPECT_DEMANGLING( + "_RNCNvCs09azAZ_10crate_name9func_names1234567_0Cs123_12client_crate", + "crate_name::func_name::{closure#?}"); +} + +TEST(DemangleRust, UnexpectedlyNamedClosure) { + EXPECT_DEMANGLING( + "_RNCNvCs123_10crate_name9func_name12closure_nameCs456_12client_crate", + "crate_name::func_name::{closure:closure_name#0}"); + EXPECT_DEMANGLING( + "_RNCNvCs123_10crate_name9func_names2_12closure_nameCs456_12client_crate", + "crate_name::func_name::{closure:closure_name#4}"); +} + +TEST(DemangleRust, ItemNestedInsideClosure) { + EXPECT_DEMANGLING( + "_RNvNCNvCs123_10crate_name9func_name015inner_func_nameCs_12client_crate", + "crate_name::func_name::{closure#0}::inner_func_name"); +} + +TEST(DemangleRust, Shim) { + EXPECT_DEMANGLING( + "_RNSNvCs123_10crate_name9func_name6vtableCs456_12client_crate", + "crate_name::func_name::{shim:vtable#0}"); +} + +TEST(DemangleRust, UnknownUppercaseNamespace) { + EXPECT_DEMANGLING( + "_RNXNvCs123_10crate_name9func_name14mystery_objectCs456_12client_crate", + "crate_name::func_name::{X:mystery_object#0}"); +} + +TEST(DemangleRust, NestedUppercaseNamespaces) { + EXPECT_DEMANGLING( + "_RNCNXNYCs123_10crate_names0_1ys1_1xs2_0Cs456_12client_crate", + "crate_name::{Y:y#2}::{X:x#3}::{closure#4}"); +} + +TEST(DemangleRust, TraitDefinition) { + EXPECT_DEMANGLING( + "_RNvYNtC7crate_a9my_structNtC7crate_b8my_trait1f", + "<crate_a::my_struct as crate_b::my_trait>::f"); +} + +TEST(DemangleRust, BasicTypeNames) { + EXPECT_DEMANGLING("_RNvYaNtC1c1t1f", "<i8 as c::t>::f"); + EXPECT_DEMANGLING("_RNvYbNtC1c1t1f", "<bool as c::t>::f"); + EXPECT_DEMANGLING("_RNvYcNtC1c1t1f", "<char as c::t>::f"); + EXPECT_DEMANGLING("_RNvYdNtC1c1t1f", "<f64 as c::t>::f"); + EXPECT_DEMANGLING("_RNvYeNtC1c1t1f", "<str as c::t>::f"); + EXPECT_DEMANGLING("_RNvYfNtC1c1t1f", "<f32 as c::t>::f"); + EXPECT_DEMANGLING("_RNvYhNtC1c1t1f", "<u8 as c::t>::f"); + EXPECT_DEMANGLING("_RNvYiNtC1c1t1f", "<isize as c::t>::f"); + EXPECT_DEMANGLING("_RNvYjNtC1c1t1f", "<usize as c::t>::f"); + EXPECT_DEMANGLING("_RNvYlNtC1c1t1f", "<i32 as c::t>::f"); + EXPECT_DEMANGLING("_RNvYmNtC1c1t1f", "<u32 as c::t>::f"); + EXPECT_DEMANGLING("_RNvYnNtC1c1t1f", "<i128 as c::t>::f"); + EXPECT_DEMANGLING("_RNvYoNtC1c1t1f", "<u128 as c::t>::f"); + EXPECT_DEMANGLING("_RNvYpNtC1c1t1f", "<_ as c::t>::f"); + EXPECT_DEMANGLING("_RNvYsNtC1c1t1f", "<i16 as c::t>::f"); + EXPECT_DEMANGLING("_RNvYtNtC1c1t1f", "<u16 as c::t>::f"); + EXPECT_DEMANGLING("_RNvYuNtC1c1t1f", "<() as c::t>::f"); + EXPECT_DEMANGLING("_RNvYvNtC1c1t1f", "<... as c::t>::f"); + EXPECT_DEMANGLING("_RNvYxNtC1c1t1f", "<i64 as c::t>::f"); + EXPECT_DEMANGLING("_RNvYyNtC1c1t1f", "<u64 as c::t>::f"); + EXPECT_DEMANGLING("_RNvYzNtC1c1t1f", "<! as c::t>::f"); + + EXPECT_DEMANGLING_FAILS("_RNvYkNtC1c1t1f"); +} + +TEST(DemangleRust, SliceTypes) { + EXPECT_DEMANGLING("_RNvYSlNtC1c1t1f", "<[i32] as c::t>::f"); + EXPECT_DEMANGLING("_RNvYSNtC1d1sNtC1c1t1f", "<[d::s] as c::t>::f"); +} + +TEST(DemangleRust, ImmutableReferenceTypes) { + EXPECT_DEMANGLING("_RNvYRlNtC1c1t1f", "<&i32 as c::t>::f"); + EXPECT_DEMANGLING("_RNvYRNtC1d1sNtC1c1t1f", "<&d::s as c::t>::f"); +} + +TEST(DemangleRust, MutableReferenceTypes) { + EXPECT_DEMANGLING("_RNvYQlNtC1c1t1f", "<&mut i32 as c::t>::f"); + EXPECT_DEMANGLING("_RNvYQNtC1d1sNtC1c1t1f", "<&mut d::s as c::t>::f"); +} + +TEST(DemangleRust, ConstantRawPointerTypes) { + EXPECT_DEMANGLING("_RNvYPlNtC1c1t1f", "<*const i32 as c::t>::f"); + EXPECT_DEMANGLING("_RNvYPNtC1d1sNtC1c1t1f", "<*const d::s as c::t>::f"); +} + +TEST(DemangleRust, MutableRawPointerTypes) { + EXPECT_DEMANGLING("_RNvYOlNtC1c1t1f", "<*mut i32 as c::t>::f"); + EXPECT_DEMANGLING("_RNvYONtC1d1sNtC1c1t1f", "<*mut d::s as c::t>::f"); +} + +TEST(DemangleRust, TupleLength0) { + EXPECT_DEMANGLING("_RNvYTENtC1c1t1f", "<() as c::t>::f"); +} + +TEST(DemangleRust, TupleLength1) { + EXPECT_DEMANGLING("_RNvYTlENtC1c1t1f", "<(i32,) as c::t>::f"); + EXPECT_DEMANGLING("_RNvYTNtC1d1sENtC1c1t1f", "<(d::s,) as c::t>::f"); +} + +TEST(DemangleRust, TupleLength2) { + EXPECT_DEMANGLING("_RNvYTlmENtC1c1t1f", "<(i32, u32) as c::t>::f"); + EXPECT_DEMANGLING("_RNvYTNtC1d1xNtC1e1yENtC1c1t1f", + "<(d::x, e::y) as c::t>::f"); +} + +TEST(DemangleRust, TupleLength3) { + EXPECT_DEMANGLING("_RNvYTlmnENtC1c1t1f", "<(i32, u32, i128) as c::t>::f"); + EXPECT_DEMANGLING("_RNvYTNtC1d1xNtC1e1yNtC1f1zENtC1c1t1f", + "<(d::x, e::y, f::z) as c::t>::f"); +} + +TEST(DemangleRust, LongerTuplesAbbreviated) { + EXPECT_DEMANGLING("_RNvYTlmnoENtC1c1t1f", + "<(i32, u32, i128, ...) as c::t>::f"); + EXPECT_DEMANGLING("_RNvYTlmnNtC1d1xNtC1e1yENtC1c1t1f", + "<(i32, u32, i128, ...) as c::t>::f"); +} + +TEST(DemangleRust, PathBackrefToCrate) { + EXPECT_DEMANGLING("_RNvYNtC8my_crate9my_structNtB4_8my_trait1f", + "<my_crate::my_struct as my_crate::my_trait>::f"); +} + +TEST(DemangleRust, PathBackrefToNestedPath) { + EXPECT_DEMANGLING("_RNvYNtNtC1c1m1sNtB4_1t1f", "<c::m::s as c::m::t>::f"); +} + +TEST(DemangleRust, PathBackrefAsInstantiatingCrate) { + EXPECT_DEMANGLING("_RNCNvC8my_crate7my_func0B3_", + "my_crate::my_func::{closure#0}"); +} + +TEST(DemangleRust, TypeBackrefsNestedInTuple) { + EXPECT_DEMANGLING("_RNvYTTRlB4_ERB3_ENtC1c1t1f", + "<((&i32, &i32), &(&i32, &i32)) as c::t>::f"); +} + +TEST(DemangleRust, NoInfiniteLoopOnBackrefToTheWhole) { + EXPECT_DEMANGLING_FAILS("_RB_"); + EXPECT_DEMANGLING_FAILS("_RNvB_1sNtC1c1t1f"); +} + +TEST(DemangleRust, NoCrashOnForwardBackref) { + EXPECT_DEMANGLING_FAILS("_RB0_"); + EXPECT_DEMANGLING_FAILS("_RB1_"); + EXPECT_DEMANGLING_FAILS("_RB2_"); + EXPECT_DEMANGLING_FAILS("_RB3_"); + EXPECT_DEMANGLING_FAILS("_RB4_"); +} + +TEST(DemangleRust, PathBackrefsDoNotRecurseDuringSilence) { + // B_ points at the value f (the whole mangling), so the cycle would lead to + // parse failure if the parser tried to parse what was pointed to. + EXPECT_DEMANGLING("_RNvYTlmnNtB_1sENtC1c1t1f", + "<(i32, u32, i128, ...) as c::t>::f"); +} + +TEST(DemangleRust, TypeBackrefsDoNotRecurseDuringSilence) { + // B2_ points at the tuple type, likewise making a cycle that the parser + // avoids following. + EXPECT_DEMANGLING("_RNvYTlmnB2_ENtC1c1t1f", + "<(i32, u32, i128, ...) as c::t>::f"); +} + +TEST(DemangleRust, ConstBackrefsDoNotRecurseDuringSilence) { + // B_ points at the whole I...E mangling, which does not parse as a const. + EXPECT_DEMANGLING("_RINvC1c1fAlB_E", "c::f::<>"); +} + +TEST(DemangleRust, ReturnFromBackrefToInputPosition256) { + // Show that we can resume at input positions that don't fit into a byte. + EXPECT_DEMANGLING("_RNvYNtC1c238very_long_type_" + "ABCDEFGHIJabcdefghijABCDEFGHIJabcdefghij" + "ABCDEFGHIJabcdefghijABCDEFGHIJabcdefghij" + "ABCDEFGHIJabcdefghijABCDEFGHIJabcdefghij" + "ABCDEFGHIJabcdefghijABCDEFGHIJabcdefghij" + "ABCDEFGHIJabcdefghijABCDEFGHIJabcdefghij" + "ABCDEFGHIJabcdefghijABC" + "NtB4_1t1f", + "<c::very_long_type_" + "ABCDEFGHIJabcdefghijABCDEFGHIJabcdefghij" + "ABCDEFGHIJabcdefghijABCDEFGHIJabcdefghij" + "ABCDEFGHIJabcdefghijABCDEFGHIJabcdefghij" + "ABCDEFGHIJabcdefghijABCDEFGHIJabcdefghij" + "ABCDEFGHIJabcdefghijABCDEFGHIJabcdefghij" + "ABCDEFGHIJabcdefghijABC" + " as c::t>::f"); +} + +TEST(DemangleRust, EmptyGenericArgs) { + EXPECT_DEMANGLING("_RINvC1c1fE", "c::f::<>"); +} + +TEST(DemangleRust, OneSimpleTypeInGenericArgs) { + EXPECT_DEMANGLING("_RINvC1c1flE", // c::f::<i32> + "c::f::<>"); +} + +TEST(DemangleRust, OneTupleInGenericArgs) { + EXPECT_DEMANGLING("_RINvC1c1fTlmEE", // c::f::<(i32, u32)> + "c::f::<>"); +} + +TEST(DemangleRust, OnePathInGenericArgs) { + EXPECT_DEMANGLING("_RINvC1c1fNtC1d1sE", // c::f::<d::s> + "c::f::<>"); +} + +TEST(DemangleRust, LongerGenericArgs) { + EXPECT_DEMANGLING("_RINvC1c1flmRNtC1d1sE", // c::f::<i32, u32, &d::s> + "c::f::<>"); +} + +TEST(DemangleRust, BackrefInGenericArgs) { + EXPECT_DEMANGLING("_RINvC1c1fRlB7_NtB2_1sE", // c::f::<&i32, &i32, c::s> + "c::f::<>"); +} + +TEST(DemangleRust, NestedGenericArgs) { + EXPECT_DEMANGLING("_RINvC1c1fINtB2_1slEmE", // c::f::<c::s::<i32>, u32> + "c::f::<>"); +} + +TEST(DemangleRust, MonomorphicEntityNestedInsideGeneric) { + EXPECT_DEMANGLING("_RNvINvC1c1fppE1g", // c::f::<_, _>::g + "c::f::<>::g"); +} + +TEST(DemangleRust, ArrayTypeWithSimpleElementType) { + EXPECT_DEMANGLING("_RNvYAlj1f_NtC1c1t1f", "<[i32; 0x1f] as c::t>::f"); +} + +TEST(DemangleRust, ArrayTypeWithComplexElementType) { + EXPECT_DEMANGLING("_RNvYAINtC1c1slEj1f_NtB6_1t1f", + "<[c::s::<>; 0x1f] as c::t>::f"); +} + +TEST(DemangleRust, NestedArrayType) { + EXPECT_DEMANGLING("_RNvYAAlj1f_j2e_NtC1c1t1f", + "<[[i32; 0x1f]; 0x2e] as c::t>::f"); +} + +TEST(DemangleRust, BackrefArraySize) { + EXPECT_DEMANGLING("_RNvYAAlj1f_B5_NtC1c1t1f", + "<[[i32; 0x1f]; 0x1f] as c::t>::f"); +} + +TEST(DemangleRust, ZeroArraySize) { + EXPECT_DEMANGLING("_RNvYAlj0_NtC1c1t1f", "<[i32; 0x0] as c::t>::f"); +} + +TEST(DemangleRust, SurprisingMinusesInArraySize) { + // Compilers shouldn't do this stuff, but existing demanglers accept it. + EXPECT_DEMANGLING("_RNvYAljn0_NtC1c1t1f", "<[i32; -0x0] as c::t>::f"); + EXPECT_DEMANGLING("_RNvYAljn42_NtC1c1t1f", "<[i32; -0x42] as c::t>::f"); +} + +TEST(DemangleRust, NumberAsGenericArg) { + EXPECT_DEMANGLING("_RINvC1c1fKl8_E", // c::f::<0x8> + "c::f::<>"); +} + +TEST(DemangleRust, NumberAsFirstOfTwoGenericArgs) { + EXPECT_DEMANGLING("_RINvC1c1fKl8_mE", // c::f::<0x8, u32> + "c::f::<>"); +} + +TEST(DemangleRust, NumberAsSecondOfTwoGenericArgs) { + EXPECT_DEMANGLING("_RINvC1c1fmKl8_E", // c::f::<u32, 0x8> + "c::f::<>"); +} + +TEST(DemangleRust, NumberPlaceholder) { + EXPECT_DEMANGLING("_RNvINvC1c1fKpE1g", // c::f::<_>::g + "c::f::<>::g"); +} + +TEST(DemangleRust, InherentImplWithoutDisambiguator) { + EXPECT_DEMANGLING("_RNvMNtC8my_crate6my_modNtB2_9my_struct7my_func", + "<my_crate::my_mod::my_struct>::my_func"); +} + +TEST(DemangleRust, InherentImplWithDisambiguator) { + EXPECT_DEMANGLING("_RNvMs_NtC8my_crate6my_modNtB4_9my_struct7my_func", + "<my_crate::my_mod::my_struct>::my_func"); +} + +TEST(DemangleRust, TraitImplWithoutDisambiguator) { + EXPECT_DEMANGLING("_RNvXC8my_crateNtB2_9my_structNtB2_8my_trait7my_func", + "<my_crate::my_struct as my_crate::my_trait>::my_func"); +} + +TEST(DemangleRust, TraitImplWithDisambiguator) { + EXPECT_DEMANGLING("_RNvXs_C8my_crateNtB4_9my_structNtB4_8my_trait7my_func", + "<my_crate::my_struct as my_crate::my_trait>::my_func"); +} + +TEST(DemangleRust, TraitImplWithNonpathSelfType) { + EXPECT_DEMANGLING("_RNvXC8my_crateRlNtB2_8my_trait7my_func", + "<&i32 as my_crate::my_trait>::my_func"); +} + +TEST(DemangleRust, ThunkType) { + EXPECT_DEMANGLING("_RNvYFEuNtC1c1t1f", // <fn() as c::t>::f + "<fn... as c::t>::f"); +} + +TEST(DemangleRust, NontrivialFunctionReturnType) { + EXPECT_DEMANGLING( + "_RNvYFERTlmENtC1c1t1f", // <fn() -> &(i32, u32) as c::t>::f + "<fn... as c::t>::f"); +} + +TEST(DemangleRust, OneParameterType) { + EXPECT_DEMANGLING("_RNvYFlEuNtC1c1t1f", // <fn(i32) as c::t>::f + "<fn... as c::t>::f"); +} + +TEST(DemangleRust, TwoParameterTypes) { + EXPECT_DEMANGLING("_RNvYFlmEuNtC1c1t1f", // <fn(i32, u32) as c::t>::f + "<fn... as c::t>::f"); +} + +TEST(DemangleRust, ExternC) { + EXPECT_DEMANGLING("_RNvYFKCEuNtC1c1t1f", // <extern "C" fn() as c::t>>::f + "<fn... as c::t>::f"); +} + +TEST(DemangleRust, ExternOther) { + EXPECT_DEMANGLING( + "_RNvYFK5not_CEuNtC1c1t1f", // <extern "not-C" fn() as c::t>::f + "<fn... as c::t>::f"); +} + +TEST(DemangleRust, Unsafe) { + EXPECT_DEMANGLING("_RNvYFUEuNtC1c1t1f", // <unsafe fn() as c::t>::f + "<fn... as c::t>::f"); +} + +TEST(DemangleRust, Binder) { + EXPECT_DEMANGLING( + // <for<'a> fn(&'a i32) -> &'a i32 as c::t>::f + "_RNvYFG_RL0_lEB5_NtC1c1t1f", + "<fn... as c::t>::f"); +} + +TEST(DemangleRust, AllFnSigFeaturesInOrder) { + EXPECT_DEMANGLING( + // <for<'a> unsafe extern "C" fn(&'a i32) -> &'a i32 as c::t>::f + "_RNvYFG_UKCRL0_lEB8_NtC1c1t1f", + "<fn... as c::t>::f"); +} + +TEST(DemangleRust, LifetimeInGenericArgs) { + EXPECT_DEMANGLING("_RINvC1c1fINtB2_1sL_EE", // c::f::<c::s::<'_>> + "c::f::<>"); +} + +TEST(DemangleRust, EmptyDynTrait) { + // This shouldn't happen, but the grammar allows it and existing demanglers + // accept it. + EXPECT_DEMANGLING("_RNvYDEL_NtC1c1t1f", + "<dyn as c::t>::f"); +} + +TEST(DemangleRust, SimpleDynTrait) { + EXPECT_DEMANGLING("_RNvYDNtC1c1tEL_NtC1d1u1f", + "<dyn c::t as d::u>::f"); +} + +TEST(DemangleRust, DynTraitWithOneAssociatedType) { + EXPECT_DEMANGLING( + "_RNvYDNtC1c1tp1xlEL_NtC1d1u1f", // <dyn c::t<x = i32> as d::u>::f + "<dyn c::t<> as d::u>::f"); +} + +TEST(DemangleRust, DynTraitWithTwoAssociatedTypes) { + EXPECT_DEMANGLING( + // <dyn c::t<x = i32, y = u32> as d::u>::f + "_RNvYDNtC1c1tp1xlp1ymEL_NtC1d1u1f", + "<dyn c::t<> as d::u>::f"); +} + +TEST(DemangleRust, DynTraitPlusAutoTrait) { + EXPECT_DEMANGLING( + "_RNvYDNtC1c1tNtNtC3std6marker4SendEL_NtC1d1u1f", + "<dyn c::t + std::marker::Send as d::u>::f"); +} + +TEST(DemangleRust, DynTraitPlusTwoAutoTraits) { + EXPECT_DEMANGLING( + "_RNvYDNtC1c1tNtNtC3std6marker4CopyNtBc_4SyncEL_NtC1d1u1f", + "<dyn c::t + std::marker::Copy + std::marker::Sync as d::u>::f"); +} + +TEST(DemangleRust, HigherRankedDynTrait) { + EXPECT_DEMANGLING( + // <dyn for<'a> c::t::<&'a i32> as d::u>::f + "_RNvYDG_INtC1c1tRL0_lEEL_NtC1d1u1f", + "<dyn c::t::<> as d::u>::f"); +} + +} // namespace +} // namespace debugging_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/debugging/internal/demangle_test.cc b/absl/debugging/internal/demangle_test.cc index a16ab75e..5579221a 100644 --- a/absl/debugging/internal/demangle_test.cc +++ b/absl/debugging/internal/demangle_test.cc @@ -31,6 +31,477 @@ namespace { using ::testing::ContainsRegex; +TEST(Demangle, FunctionTemplate) { + char tmp[100]; + + // template <typename T> + // int foo(T); + // + // foo<int>(5); + ASSERT_TRUE(Demangle("_Z3fooIiEiT_", tmp, sizeof(tmp))); + EXPECT_STREQ(tmp, "foo<>()"); +} + +TEST(Demangle, FunctionTemplateWithNesting) { + char tmp[100]; + + // template <typename T> + // int foo(T); + // + // foo<Wrapper<int>>({ .value = 5 }); + ASSERT_TRUE(Demangle("_Z3fooI7WrapperIiEEiT_", tmp, sizeof(tmp))); + EXPECT_STREQ(tmp, "foo<>()"); +} + +TEST(Demangle, FunctionTemplateWithNonTypeParamConstraint) { + char tmp[100]; + + // template <std::integral T> + // int foo(T); + // + // foo<int>(5); + ASSERT_TRUE(Demangle("_Z3fooITkSt8integraliEiT_", tmp, sizeof(tmp))); + EXPECT_STREQ(tmp, "foo<>()"); +} + +TEST(Demangle, FunctionTemplateWithFunctionRequiresClause) { + char tmp[100]; + + // template <typename T> + // int foo() requires std::integral<T>; + // + // foo<int>(); + ASSERT_TRUE(Demangle("_Z3fooIiEivQsr3stdE8integralIT_E", tmp, sizeof(tmp))); + EXPECT_STREQ(tmp, "foo<>()"); +} + +TEST(Demangle, FunctionWithTemplateParamRequiresClause) { + char tmp[100]; + + // template <typename T> + // requires std::integral<T> + // int foo(); + // + // foo<int>(); + ASSERT_TRUE(Demangle("_Z3fooIiQsr3stdE8integralIT_EEiv", tmp, sizeof(tmp))); + EXPECT_STREQ(tmp, "foo<>()"); +} + +TEST(Demangle, FunctionWithTemplateParamAndFunctionRequiresClauses) { + char tmp[100]; + + // template <typename T> + // requires std::integral<T> + // int foo() requires std::integral<T>; + // + // foo<int>(); + ASSERT_TRUE(Demangle("_Z3fooIiQsr3stdE8integralIT_EEivQsr3stdE8integralIS0_E", + tmp, sizeof(tmp))); + EXPECT_STREQ(tmp, "foo<>()"); +} + +TEST(Demangle, FunctionTemplateBacktracksOnMalformedRequiresClause) { + char tmp[100]; + + // template <typename T> + // int foo(T); + // + // foo<int>(5); + // Except there's an extra `Q` where the mangled requires clause would be. + ASSERT_FALSE(Demangle("_Z3fooIiQEiT_", tmp, sizeof(tmp))); +} + +TEST(Demangle, FunctionTemplateWithAutoParam) { + char tmp[100]; + + // template <auto> + // void foo(); + // + // foo<1>(); + ASSERT_TRUE(Demangle("_Z3fooITnDaLi1EEvv", tmp, sizeof(tmp))); + EXPECT_STREQ(tmp, "foo<>()"); +} + +TEST(Demangle, FunctionTemplateWithNonTypeParamPack) { + char tmp[100]; + + // template <int&..., typename T> + // void foo(T); + // + // foo(2); + ASSERT_TRUE(Demangle("_Z3fooITpTnRiJEiEvT0_", tmp, sizeof(tmp))); + EXPECT_STREQ(tmp, "foo<>()"); +} + +TEST(Demangle, FunctionTemplateTemplateParamWithConstrainedArg) { + char tmp[100]; + + // template <typename T> + // concept True = true; + // + // template <typename T> requires True<T> + // struct Fooer {}; + // + // template <template <typename T> typename> + // void foo() {} + // + // foo<Fooer>(); + ASSERT_TRUE(Demangle("_Z3fooITtTyE5FooerEvv", tmp, sizeof(tmp))); + EXPECT_STREQ(tmp, "foo<>()"); +} + +TEST(Demangle, ConstrainedAutoInFunctionTemplate) { + char tmp[100]; + + // template <typename T> concept C = true; + // template <C auto N> void f() {} + // template void f<0>(); + ASSERT_TRUE(Demangle("_Z1fITnDk1CLi0EEvv", tmp, sizeof(tmp))); + EXPECT_STREQ(tmp, "f<>()"); +} + +TEST(Demangle, ConstrainedFriendFunctionTemplate) { + char tmp[100]; + + // Source: + // + // namespace ns { + // template <class T> struct Y { + // friend void y(Y) requires true {} + // }; + // } // namespace ns + // + // y(ns::Y<int>{}); + // + // LLVM demangling: + // + // ns::Y<int>::friend y(ns::Y<int>) requires true + ASSERT_TRUE(Demangle("_ZN2ns1YIiEF1yES1_QLb1E", tmp, sizeof(tmp))); + EXPECT_STREQ(tmp, "ns::Y<>::friend y()"); +} + +TEST(Demangle, ConstrainedFriendOperatorTemplate) { + char tmp[100]; + + // ns::Y<int>::friend operator*(ns::Y<int>) requires true + ASSERT_TRUE(Demangle("_ZN2ns1YIiEFdeES1_QLb1E", tmp, sizeof(tmp))); + EXPECT_STREQ(tmp, "ns::Y<>::friend operator*()"); +} + +TEST(Demangle, NonTemplateBuiltinType) { + char tmp[100]; + + // void foo(__my_builtin_type t); + // + // foo({}); + ASSERT_TRUE(Demangle("_Z3foou17__my_builtin_type", tmp, sizeof(tmp))); + EXPECT_STREQ(tmp, "foo()"); +} + +TEST(Demangle, SingleArgTemplateBuiltinType) { + char tmp[100]; + + // template <typename T> + // __my_builtin_type<T> foo(); + // + // foo<int>(); + ASSERT_TRUE(Demangle("_Z3fooIiEu17__my_builtin_typeIT_Ev", tmp, sizeof(tmp))); + EXPECT_STREQ(tmp, "foo<>()"); +} + +TEST(Demangle, TwoArgTemplateBuiltinType) { + char tmp[100]; + + // template <typename T, typename U> + // __my_builtin_type<T, U> foo(); + // + // foo<int, char>(); + ASSERT_TRUE( + Demangle("_Z3fooIicEu17__my_builtin_typeIT_T0_Ev", tmp, sizeof(tmp))); + EXPECT_STREQ(tmp, "foo<>()"); +} + +TEST(Demangle, TypeNestedUnderTemplatedBuiltinType) { + char tmp[100]; + + // Source: + // + // template <typename T> + // typename std::remove_reference_t<T>::type f(T t); + // + // struct C { using type = C; }; + // + // f<const C&>(C{}); + // + // These days std::remove_reference_t is implemented in terms of a vendor + // builtin __remove_reference_t. A full demangling might look like: + // + // __remove_reference_t<C const&>::type f<C const&>(C const&) + ASSERT_TRUE(Demangle("_Z1fIRK1CENu20__remove_reference_tIT_E4typeES3_", + tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); +} + +TEST(Demangle, TemplateTemplateParamSubstitution) { + char tmp[100]; + + // template <typename T> + // concept True = true; + // + // template<std::integral T, T> struct Foolable {}; + // template<template<typename T, T> typename> void foo() {} + // + // template void foo<Foolable>(); + ASSERT_TRUE(Demangle("_Z3fooITtTyTnTL0__E8FoolableEvv", tmp, sizeof(tmp))); + EXPECT_STREQ(tmp, "foo<>()"); +} + +TEST(Demangle, TemplateParamSubstitutionWithGenericLambda) { + char tmp[100]; + + // template <typename> + // struct Fooer { + // template <typename> + // void foo(decltype([](auto x, auto y) {})) {} + // }; + // + // Fooer<int> f; + // f.foo<int>({}); + ASSERT_TRUE( + Demangle("_ZN5FooerIiE3fooIiEEvNS0_UlTL0__TL0_0_E_E", tmp, sizeof(tmp))); + EXPECT_STREQ(tmp, "Fooer<>::foo<>()"); +} + +TEST(Demangle, LambdaRequiresTrue) { + char tmp[100]; + + // auto $_0::operator()<int>(int) const requires true + ASSERT_TRUE(Demangle("_ZNK3$_0clIiEEDaT_QLb1E", tmp, sizeof(tmp))); + EXPECT_STREQ(tmp, "$_0::operator()<>()"); +} + +TEST(Demangle, LambdaRequiresSimpleExpression) { + char tmp[100]; + + // auto $_0::operator()<int>(int) const requires 2 + 2 == 4 + ASSERT_TRUE(Demangle("_ZNK3$_0clIiEEDaT_QeqplLi2ELi2ELi4E", + tmp, sizeof(tmp))); + EXPECT_STREQ(tmp, "$_0::operator()<>()"); +} + +TEST(Demangle, LambdaRequiresRequiresExpressionContainingTrue) { + char tmp[100]; + + // auto $_0::operator()<int>(int) const requires requires { true; } + ASSERT_TRUE(Demangle("_ZNK3$_0clIiEEDaT_QrqXLb1EE", tmp, sizeof(tmp))); + EXPECT_STREQ(tmp, "$_0::operator()<>()"); +} + +TEST(Demangle, LambdaRequiresRequiresExpressionContainingConcept) { + char tmp[100]; + + // auto $_0::operator()<int>(int) const + // requires requires { std::same_as<decltype(fp), int>; } + ASSERT_TRUE(Demangle("_ZNK3$_0clIiEEDaT_QrqXsr3stdE7same_asIDtfp_EiEE", + tmp, sizeof(tmp))); + EXPECT_STREQ(tmp, "$_0::operator()<>()"); +} + +TEST(Demangle, LambdaRequiresRequiresExpressionContainingNoexceptExpression) { + char tmp[100]; + + // auto $_0::operator()<int>(int) const + // requires requires { {fp + fp} noexcept; } + ASSERT_TRUE(Demangle("_ZNK3$_0clIiEEDaT_QrqXplfp_fp_NE", tmp, sizeof(tmp))); + EXPECT_STREQ(tmp, "$_0::operator()<>()"); +} + +TEST(Demangle, LambdaRequiresRequiresExpressionContainingReturnTypeConstraint) { + char tmp[100]; + + // auto $_0::operator()<int>(int) const + // requires requires { {fp + fp} -> std::same_as<decltype(fp)>; } + ASSERT_TRUE(Demangle("_ZNK3$_0clIiEEDaT_QrqXplfp_fp_RNSt7same_asIDtfp_EEEE", + tmp, sizeof(tmp))); + EXPECT_STREQ(tmp, "$_0::operator()<>()"); +} + +TEST(Demangle, LambdaRequiresRequiresExpressionWithBothNoexceptAndReturnType) { + char tmp[100]; + + // auto $_0::operator()<int>(int) const + // requires requires { {fp + fp} noexcept -> std::same_as<decltype(fp)>; } + ASSERT_TRUE(Demangle("_ZNK3$_0clIiEEDaT_QrqXplfp_fp_NRNSt7same_asIDtfp_EEEE", + tmp, sizeof(tmp))); + EXPECT_STREQ(tmp, "$_0::operator()<>()"); +} + +TEST(Demangle, LambdaRequiresRequiresExpressionContainingType) { + char tmp[100]; + + // auto $_0::operator()<S>(S) const + // requires requires { typename S::T; } + ASSERT_TRUE(Demangle("_ZNK3$_0clI1SEEDaT_QrqTNS2_1TEE", tmp, sizeof(tmp))); + EXPECT_STREQ(tmp, "$_0::operator()<>()"); +} + +TEST(Demangle, LambdaRequiresRequiresExpressionNestingAnotherRequires) { + char tmp[100]; + + // auto $_0::operator()<int>(int) const requires requires { requires true; } + ASSERT_TRUE(Demangle("_ZNK3$_0clIiEEDaT_QrqQLb1EE", tmp, sizeof(tmp))); + EXPECT_STREQ(tmp, "$_0::operator()<>()"); +} + +TEST(Demangle, LambdaRequiresRequiresExpressionContainingTwoRequirements) { + char tmp[100]; + + // auto $_0::operator()<int>(int) const + // requires requires { requires true; requires 2 + 2 == 4; } + ASSERT_TRUE(Demangle("_ZNK3$_0clIiEEDaT_QrqXLb1EXeqplLi2ELi2ELi4EE", + tmp, sizeof(tmp))); + EXPECT_STREQ(tmp, "$_0::operator()<>()"); +} + +TEST(Demangle, RequiresExpressionWithItsOwnParameter) { + char tmp[100]; + + // S<requires (int) { fp + fp; }> f<int>(int) + ASSERT_TRUE(Demangle("_Z1fIiE1SIXrQT__XplfL0p_fp_EEES1_", tmp, sizeof(tmp))); + EXPECT_STREQ(tmp, "f<>()"); +} + +TEST(Demangle, LambdaWithExplicitTypeArgument) { + char tmp[100]; + + // Source: + // + // template <class T> T f(T t) { + // return []<class U>(U u) { return u + u; }(t); + // } + // + // template int f<int>(int); + // + // Full LLVM demangling of the lambda call operator: + // + // auto int f<int>(int)::'lambda'<typename $T>(int):: + // operator()<int>(int) const + ASSERT_TRUE(Demangle("_ZZ1fIiET_S0_ENKUlTyS0_E_clIiEEDaS0_", + tmp, sizeof(tmp))); + EXPECT_STREQ(tmp, "f<>()::{lambda()#1}::operator()<>()"); +} + +TEST(Demangle, LambdaWithExplicitPackArgument) { + char tmp[100]; + + // Source: + // + // template <class T> T h(T t) { + // return []<class... U>(U... u) { + // return ((u + u) + ... + 0); + // }(t); + // } + // + // template int h<int>(int); + // + // Full LLVM demangling of the lambda call operator: + // + // auto int f<int>(int)::'lambda'<typename ...$T>($T...):: + // operator()<int>($T...) const + ASSERT_TRUE(Demangle("_ZZ1fIiET_S0_ENKUlTpTyDpT_E_clIJiEEEDaS2_", + tmp, sizeof(tmp))); + EXPECT_STREQ(tmp, "f<>()::{lambda()#1}::operator()<>()"); +} + +TEST(Demangle, LambdaInClassMemberDefaultArgument) { + char tmp[100]; + + // Source: + // + // struct S { + // static auto f(void (*g)() = [] {}) { return g; } + // }; + // void (*p)() = S::f(); + // + // Full LLVM demangling of the lambda call operator: + // + // S::f(void (*)())::'lambda'()::operator()() const + // + // Full GNU binutils demangling: + // + // S::f(void (*)())::{default arg#1}::{lambda()#1}::operator()() const + ASSERT_TRUE(Demangle("_ZZN1S1fEPFvvEEd_NKUlvE_clEv", tmp, sizeof(tmp))); + EXPECT_STREQ(tmp, "S::f()::{default arg#1}::{lambda()#1}::operator()()"); + + // The same but in the second rightmost default argument. + ASSERT_TRUE(Demangle("_ZZN1S1fEPFvvEEd0_NKUlvE_clEv", tmp, sizeof(tmp))); + EXPECT_STREQ(tmp, "S::f()::{default arg#2}::{lambda()#1}::operator()()"); + + // Reject negative <(parameter) number> values. + ASSERT_FALSE(Demangle("_ZZN1S1fEPFvvEEdn1_NKUlvE_clEv", tmp, sizeof(tmp))); +} + +TEST(Demangle, AvoidSignedOverflowForUnfortunateParameterNumbers) { + char tmp[100]; + + // Here <number> + 2 fits in an int, but just barely. (We expect no such + // input in practice: real functions don't have billions of arguments.) + ASSERT_TRUE(Demangle("_ZZN1S1fEPFvvEEd2147483645_NKUlvE_clEv", + tmp, sizeof(tmp))); + EXPECT_STREQ(tmp, + "S::f()::{default arg#2147483647}::{lambda()#1}::operator()()"); + + // Now <number> is an int, but <number> + 2 is not. + ASSERT_TRUE(Demangle("_ZZN1S1fEPFvvEEd2147483646_NKUlvE_clEv", + tmp, sizeof(tmp))); + EXPECT_STREQ(tmp, "S::f()::{default arg#1}::{lambda()#1}::operator()()"); + + // <number> is the largest int. + ASSERT_TRUE(Demangle("_ZZN1S1fEPFvvEEd2147483647_NKUlvE_clEv", + tmp, sizeof(tmp))); + EXPECT_STREQ(tmp, "S::f()::{default arg#1}::{lambda()#1}::operator()()"); + + // <number> itself does not fit into an int. ParseNumber truncates the value + // to int, yielding a large negative number, which we strain out. + ASSERT_TRUE(Demangle("_ZZN1S1fEPFvvEEd2147483648_NKUlvE_clEv", + tmp, sizeof(tmp))); + EXPECT_STREQ(tmp, "S::f()::{default arg#1}::{lambda()#1}::operator()()"); +} + +TEST(Demangle, SubstpackNotationForTroublesomeTemplatePack) { + char tmp[100]; + + // Source: + // + // template <template <class> class, template <class> class> struct B {}; + // + // template <template <class> class... T> struct A { + // template <template <class> class... U> void f(B<T, U>&&...) {} + // }; + // + // template void A<>::f<>(); + // + // LLVM can't demangle its own _SUBSTPACK_ notation. + ASSERT_TRUE(Demangle("_ZN1AIJEE1fIJEEEvDpO1BI_SUBSTPACK_T_E", + tmp, sizeof(tmp))); + EXPECT_STREQ(tmp, "A<>::f<>()"); +} + +TEST(Demangle, TemplateTemplateParamAppearingAsBackrefFollowedByTemplateArgs) { + char tmp[100]; + + // Source: + // + // template <template <class> class C> struct W { + // template <class T> static decltype(C<T>::m()) f() { return {}; } + // }; + // + // template <class T> struct S { static int m() { return 0; } }; + // template decltype(S<int>::m()) W<S>::f<int>(); + ASSERT_TRUE(Demangle("_ZN1WI1SE1fIiEEDTclsrS0_IT_EE1mEEv", tmp, sizeof(tmp))); + EXPECT_STREQ(tmp, "W<>::f<>()"); +} + // Test corner cases of boundary conditions. TEST(Demangle, CornerCases) { char tmp[10]; @@ -95,6 +566,250 @@ TEST(Demangle, Clones) { EXPECT_FALSE(Demangle("_ZL3Foov.isra.2.constprop.", tmp, sizeof(tmp))); } +TEST(Demangle, Discriminators) { + char tmp[80]; + + // Source: + // + // using Thunk = void (*)(); + // + // Thunk* f() { + // static Thunk thunks[12] = {}; + // + // #define THUNK(i) [backslash here] + // do { struct S { static void g() {} }; thunks[i] = &S::g; } while (0) + // + // THUNK(0); + // [... repeat for 1 to 10 ...] + // THUNK(11); + // + // return thunks; + // } + // + // The test inputs are manglings of some of the S::g member functions. + + // The first one omits the discriminator. + EXPECT_TRUE(Demangle("_ZZ1fvEN1S1gEv", tmp, sizeof(tmp))); + EXPECT_STREQ("f()::S::g()", tmp); + + // The second one encodes 0. + EXPECT_TRUE(Demangle("_ZZ1fvEN1S1gE_0v", tmp, sizeof(tmp))); + EXPECT_STREQ("f()::S::g()", tmp); + + // The eleventh one encodes 9. + EXPECT_TRUE(Demangle("_ZZ1fvEN1S1gE_9v", tmp, sizeof(tmp))); + EXPECT_STREQ("f()::S::g()", tmp); + + // The twelfth one encodes 10 with extra underscores delimiting it. + EXPECT_TRUE(Demangle("_ZZ1fvEN1S1gE__10_v", tmp, sizeof(tmp))); + EXPECT_STREQ("f()::S::g()", tmp); +} + +TEST(Demangle, SingleDigitDiscriminatorFollowedByADigit) { + char tmp[80]; + + // Don't parse 911 as a number. + EXPECT_TRUE(Demangle("_ZZ1fvEN1S1gE_911return_type", tmp, sizeof(tmp))); + EXPECT_STREQ("f()::S::g()", tmp); +} + +TEST(Demangle, LiteralOfGlobalNamespaceEnumType) { + char tmp[80]; + + // void f<(E)42>() + EXPECT_TRUE(Demangle("_Z1fIL1E42EEvv", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); +} + +TEST(Demangle, NullptrLiterals) { + char tmp[80]; + + // void f<nullptr>() + EXPECT_TRUE(Demangle("_Z1fILDnEEvv", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); + + // also void f<nullptr>() + EXPECT_TRUE(Demangle("_Z1fILDn0EEvv", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); +} + +TEST(Demangle, StringLiterals) { + char tmp[80]; + + // void f<"<char const [42]>">() + EXPECT_TRUE(Demangle("_Z1fILA42_KcEEvv", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); +} + +TEST(Demangle, ComplexFloatingPointLiterals) { + char tmp[80]; + + // Source (use g++ -fext-numeric-literals to compile): + // + // using C = double _Complex; + // template <class T> void f(char (&)[sizeof(C{sizeof(T)} + 4.0j)]) {} + // template void f<int>(char (&)[sizeof(C{sizeof(int)} + 4.0j)]); + // + // GNU demangling: + // + // void f<int>(char (&) [sizeof (double _Complex{sizeof (int)}+ + // ((double _Complex)0000000000000000_4010000000000000))]) + EXPECT_TRUE(Demangle( + "_Z1fIiEvRAszpltlCdstT_ELS0_0000000000000000_4010000000000000E_c", + tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); +} + +TEST(Demangle, Float128) { + char tmp[80]; + + // S::operator _Float128() const + EXPECT_TRUE(Demangle("_ZNK1ScvDF128_Ev", tmp, sizeof(tmp))); + EXPECT_STREQ("S::operator _Float128()", tmp); +} + +TEST(Demangle, Float128x) { + char tmp[80]; + + // S::operator _Float128x() const + EXPECT_TRUE(Demangle("_ZNK1ScvDF128xEv", tmp, sizeof(tmp))); + EXPECT_STREQ("S::operator _Float128x()", tmp); +} + +TEST(Demangle, Bfloat16) { + char tmp[80]; + + // S::operator std::bfloat16_t() const + EXPECT_TRUE(Demangle("_ZNK1ScvDF16bEv", tmp, sizeof(tmp))); + EXPECT_STREQ("S::operator std::bfloat16_t()", tmp); +} + +TEST(Demangle, SimpleSignedBitInt) { + char tmp[80]; + + // S::operator _BitInt(256)() const + EXPECT_TRUE(Demangle("_ZNK1ScvDB256_Ev", tmp, sizeof(tmp))); + EXPECT_STREQ("S::operator _BitInt(256)()", tmp); +} + +TEST(Demangle, SimpleUnsignedBitInt) { + char tmp[80]; + + // S::operator unsigned _BitInt(256)() const + EXPECT_TRUE(Demangle("_ZNK1ScvDU256_Ev", tmp, sizeof(tmp))); + EXPECT_STREQ("S::operator unsigned _BitInt(256)()", tmp); +} + +TEST(Demangle, DependentBitInt) { + char tmp[80]; + + // S::operator _BitInt(256)<256>() const + EXPECT_TRUE(Demangle("_ZNK1ScvDBT__ILi256EEEv", tmp, sizeof(tmp))); + EXPECT_STREQ("S::operator _BitInt(?)<>()", tmp); +} + +TEST(Demangle, ConversionToPointerType) { + char tmp[80]; + + // S::operator int*() const + EXPECT_TRUE(Demangle("_ZNK1ScvPiEv", tmp, sizeof(tmp))); + EXPECT_STREQ("S::operator int*()", tmp); +} + +TEST(Demangle, ConversionToLvalueReferenceType) { + char tmp[80]; + + // S::operator int&() const + EXPECT_TRUE(Demangle("_ZNK1ScvRiEv", tmp, sizeof(tmp))); + EXPECT_STREQ("S::operator int&()", tmp); +} + +TEST(Demangle, ConversionToRvalueReferenceType) { + char tmp[80]; + + // S::operator int&&() const + EXPECT_TRUE(Demangle("_ZNK1ScvOiEv", tmp, sizeof(tmp))); + EXPECT_STREQ("S::operator int&&()", tmp); +} + +TEST(Demangle, ConversionToComplexFloatingPointType) { + char tmp[80]; + + // S::operator float _Complex() const + EXPECT_TRUE(Demangle("_ZNK1ScvCfEv", tmp, sizeof(tmp))); + EXPECT_STREQ("S::operator float _Complex()", tmp); +} + +TEST(Demangle, ConversionToImaginaryFloatingPointType) { + char tmp[80]; + + // S::operator float _Imaginary() const + EXPECT_TRUE(Demangle("_ZNK1ScvGfEv", tmp, sizeof(tmp))); + EXPECT_STREQ("S::operator float _Imaginary()", tmp); +} + +TEST(Demangle, ConversionToPointerToCvQualifiedType) { + char tmp[80]; + + // S::operator int const volatile restrict*() const + EXPECT_TRUE(Demangle("_ZNK1ScvPrVKiEv", tmp, sizeof(tmp))); + EXPECT_STREQ("S::operator int const volatile restrict*()", tmp); +} + +TEST(Demangle, ConversionToLayeredPointerType) { + char tmp[80]; + + // S::operator int const* const*() const + EXPECT_TRUE(Demangle("_ZNK1ScvPKPKiEv", tmp, sizeof(tmp))); + EXPECT_STREQ("S::operator int const* const*()", tmp); +} + +TEST(Demangle, ConversionToTypeWithExtendedQualifier) { + char tmp[80]; + + // S::operator int const AS128*() const + // + // Because our scan of easy type constructors stops at the extended qualifier, + // the demangling preserves the * but loses the const. + EXPECT_TRUE(Demangle("_ZNK1ScvPU5AS128KiEv", tmp, sizeof(tmp))); + EXPECT_STREQ("S::operator int*()", tmp); +} + +TEST(Demangle, GlobalInitializers) { + char tmp[80]; + + // old form without suffix + EXPECT_TRUE(Demangle("_ZGR1v", tmp, sizeof(tmp))); + EXPECT_STREQ("reference temporary for v", tmp); + + // modern form for the whole initializer + EXPECT_TRUE(Demangle("_ZGR1v_", tmp, sizeof(tmp))); + EXPECT_STREQ("reference temporary for v", tmp); + + // next subobject in depth-first preorder traversal + EXPECT_TRUE(Demangle("_ZGR1v0_", tmp, sizeof(tmp))); + EXPECT_STREQ("reference temporary for v", tmp); + + // subobject with a larger seq-id + EXPECT_TRUE(Demangle("_ZGR1v1Z_", tmp, sizeof(tmp))); + EXPECT_STREQ("reference temporary for v", tmp); +} + +TEST(Demangle, StructuredBindings) { + char tmp[80]; + + // Source: + // + // struct S { int a, b; }; + // const auto& [x, y] = S{1, 2}; + + // [x, y] + EXPECT_TRUE(Demangle("_ZDC1x1yE", tmp, sizeof(tmp))); + + // reference temporary for [x, y] + EXPECT_TRUE(Demangle("_ZGRDC1x1yE_", tmp, sizeof(tmp))); +} + // Test the GNU abi_tag extension. TEST(Demangle, AbiTags) { char tmp[80]; @@ -119,6 +834,1078 @@ TEST(Demangle, AbiTags) { EXPECT_STREQ("C[abi:bar][abi:foo]()", tmp); } +TEST(Demangle, SimpleGnuVectorSize) { + char tmp[80]; + + // Source: + // + // #define VECTOR(size) __attribute__((vector_size(size))) + // void f(int x VECTOR(32)) {} + // + // The attribute's size is a number of bytes. The compiler verifies that this + // value corresponds to a whole number of elements and emits the number of + // elements as a <number> in the mangling. With sizeof(int) == 4, that yields + // 32/4 = 8. + // + // LLVM demangling: + // + // f(int vector[8]) + EXPECT_TRUE(Demangle("_Z1fDv8_i", tmp, sizeof(tmp))); + EXPECT_STREQ("f()", tmp); +} + +TEST(Demangle, GnuVectorSizeIsATemplateParameter) { + char tmp[80]; + + // Source: + // + // #define VECTOR(size) __attribute__((vector_size(size))) + // template <int n> void f(int x VECTOR(n)) {} + // template void f<32>(int x VECTOR(32)); + // + // LLVM demangling: + // + // void f<32>(int vector[32]) + // + // Because the size was dependent on a template parameter, it was encoded + // using the general expression encoding. Nothing in the mangling says how + // big the element type is, so the demangler is unable to show the element + // count 8 instead of the byte count 32. Arguably it would have been better + // to make the narrow production encode the byte count, so that nondependent + // and dependent versions of a 32-byte vector would both come out as + // vector[32]. + EXPECT_TRUE(Demangle("_Z1fILi32EEvDvT__i", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); +} + +TEST(Demangle, GnuVectorSizeIsADependentOperatorExpression) { + char tmp[80]; + + // Source: + // + // #define VECTOR(size) __attribute__((vector_size(size))) + // template <int n> void f(int x VECTOR(2 * n)) {} + // template void f<32>(int x VECTOR(2 * 32)); + // + // LLVM demangling: + // + // void f<32>(int vector[2 * 32]) + EXPECT_TRUE(Demangle("_Z1fILi32EEvDvmlLi2ET__i", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); +} + +TEST(Demangle, SimpleAddressSpace) { + char tmp[80]; + + // Source: + // + // void f(const int __attribute__((address_space(128)))*) {} + // + // LLVM demangling: + // + // f(int const AS128*) + // + // Itanium ABI 5.1.5.1, "Qualified types", notes that address_space is mangled + // nonuniformly as a legacy exception: the number is part of the source-name + // if nondependent but is an expression in template-args if dependent. Thus + // it is a convenient test case for both forms. + EXPECT_TRUE(Demangle("_Z1fPU5AS128Ki", tmp, sizeof(tmp))); + EXPECT_STREQ("f()", tmp); +} + +TEST(Demangle, DependentAddressSpace) { + char tmp[80]; + + // Source: + // + // template <int n> void f (const int __attribute__((address_space(n)))*) {} + // template void f<128>(const int __attribute__((address_space(128)))*); + // + // LLVM demangling: + // + // void f<128>(int AS<128>*) + EXPECT_TRUE(Demangle("_Z1fILi128EEvPU2ASIT_Ei", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); +} + +TEST(Demangle, TransactionSafeEntryPoint) { + char tmp[80]; + + EXPECT_TRUE(Demangle("_ZGTt1fv", tmp, sizeof(tmp))); + EXPECT_STREQ("transaction clone for f()", tmp); +} + +TEST(Demangle, TransactionSafeFunctionType) { + char tmp[80]; + + // GNU demangling: f(void (*)() transaction_safe) + EXPECT_TRUE(Demangle("_Z1fPDxFvvE", tmp, sizeof(tmp))); + EXPECT_STREQ("f()", tmp); +} + +TEST(Demangle, TemplateParameterObject) { + char tmp[80]; + + // Source: + // + // struct S { int x, y; }; + // template <S s, const S* p = &s> void f() {} + // template void f<S{1, 2}>(); + // + // LLVM demangling: + // + // void f<S{1, 2}, &template parameter object for S{1, 2}>() + EXPECT_TRUE(Demangle("_Z1fIXtl1SLi1ELi2EEEXadL_ZTAXtlS0_Li1ELi2EEEEEEvv", + tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); + + // The name of the object standing alone. + // + // LLVM demangling: template parameter object for S{1, 2} + EXPECT_TRUE(Demangle("_ZTAXtl1SLi1ELi2EEE", tmp, sizeof(tmp))); + EXPECT_STREQ("template parameter object", tmp); +} + +TEST(Demangle, EnableIfAttributeOnGlobalFunction) { + char tmp[80]; + + // int f(long l) __attribute__((enable_if(l >= 0, ""))) { return l; } + // + // f(long) [enable_if:fp >= 0] + EXPECT_TRUE(Demangle("_Z1fUa9enable_ifIXgefL0p_Li0EEEl", tmp, sizeof(tmp))); + EXPECT_STREQ("f()", tmp); +} + +TEST(Demangle, EnableIfAttributeOnNamespaceScopeFunction) { + char tmp[80]; + + // namespace ns { + // int f(long l) __attribute__((enable_if(l >= 0, ""))) { return l; } + // } // namespace ns + // + // ns::f(long) [enable_if:fp >= 0] + EXPECT_TRUE(Demangle("_ZN2ns1fEUa9enable_ifIXgefL0p_Li0EEEl", + tmp, sizeof(tmp))); + EXPECT_STREQ("ns::f()", tmp); +} + +TEST(Demangle, EnableIfAttributeOnFunctionTemplate) { + char tmp[80]; + + // template <class T> + // T f(T t) __attribute__((enable_if(t >= T{}, ""))) { return t; } + // template int f<int>(int); + // + // int f<int>(int) [enable_if:fp >= int{}] + EXPECT_TRUE(Demangle("_Z1fIiEUa9enable_ifIXgefL0p_tliEEET_S0_", + tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); +} + +TEST(Demangle, ThisPointerInDependentSignature) { + char tmp[80]; + + // decltype(g<int>(this)) S::f<int>() + EXPECT_TRUE(Demangle("_ZN1S1fIiEEDTcl1gIT_EfpTEEv", tmp, sizeof(tmp))); + EXPECT_STREQ("S::f<>()", tmp); +} + +TEST(Demangle, DependentMemberOperatorCall) { + char tmp[80]; + + // decltype(fp.operator()()) f<C>(C) + EXPECT_TRUE(Demangle("_Z1fI1CEDTcldtfp_onclEET_", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); +} + +TEST(Demangle, TypeNestedUnderDecltype) { + char tmp[80]; + + // Source: + // + // template <class T> struct S { using t = int; }; + // template <class T> decltype(S<T>{})::t f() { return {}; } + // void g() { f<int>(); } + // + // Full LLVM demangling of the instantiation of f: + // + // decltype(S<int>{})::t f<int>() + EXPECT_TRUE(Demangle("_Z1fIiENDTtl1SIT_EEE1tEv", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); +} + +TEST(Demangle, ElaboratedTypes) { + char tmp[80]; + + // Source: + // + // template <class T> struct S { class C {}; }; + // template <class T> void f(class S<T>::C) {} + // template void f<int>(class S<int>::C); + // + // LLVM demangling: + // + // void f<int>(struct S<int>::C) + EXPECT_TRUE(Demangle("_Z1fIiEvTsN1SIT_E1CE", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); + + // The like for unions. + EXPECT_TRUE(Demangle("_Z1fIiEvTuN1SIT_E1CE", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); + + // The like for enums. + EXPECT_TRUE(Demangle("_Z1fIiEvTeN1SIT_E1CE", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); +} + +// Test subobject-address template parameters. +TEST(Demangle, SubobjectAddresses) { + char tmp[80]; + + // void f<a.<char const at offset 123>>() + EXPECT_TRUE(Demangle("_Z1fIXsoKcL_Z1aE123EEEvv", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); + + // void f<&a.<char const at offset 0>>() + EXPECT_TRUE(Demangle("_Z1fIXadsoKcL_Z1aEEEEvv", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); + + // void f<&a.<char const at offset 123>>() + EXPECT_TRUE(Demangle("_Z1fIXadsoKcL_Z1aE123EEEvv", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); + + // void f<&a.<char const at offset 123>>(), past the end this time + EXPECT_TRUE(Demangle("_Z1fIXadsoKcL_Z1aE123pEEEvv", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); + + // void f<&a.<char const at offset 0>>() with union-selectors + EXPECT_TRUE(Demangle("_Z1fIXadsoKcL_Z1aE__1_234EEEvv", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); + + // void f<&a.<char const at offset 123>>(), past the end, with union-selector + EXPECT_TRUE(Demangle("_Z1fIXadsoKcL_Z1aE123_456pEEEvv", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); +} + +TEST(Demangle, Preincrement) { + char tmp[80]; + + // Source: + // + // template <class T> auto f(T t) -> decltype(T{++t}) { return t; } + // template auto f<int>(int t) -> decltype(int{++t}); + // + // Full LLVM demangling of the instantiation of f: + // + // decltype(int{++fp}) f<int>(int) + EXPECT_TRUE(Demangle("_Z1fIiEDTtlT_pp_fp_EES0_", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); +} + +TEST(Demangle, Postincrement) { + char tmp[80]; + + // Source: + // + // template <class T> auto f(T t) -> decltype(T{t++}) { return t; } + // template auto f<int>(int t) -> decltype(int{t++}); + // + // Full LLVM demangling of the instantiation of f: + // + // decltype(int{fp++}) f<int>(int) + EXPECT_TRUE(Demangle("_Z1fIiEDTtlT_ppfp_EES0_", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); +} + +TEST(Demangle, Predecrement) { + char tmp[80]; + + // Source: + // + // template <class T> auto f(T t) -> decltype(T{--t}) { return t; } + // template auto f<int>(int t) -> decltype(int{--t}); + // + // Full LLVM demangling of the instantiation of f: + // + // decltype(int{--fp}) f<int>(int) + EXPECT_TRUE(Demangle("_Z1fIiEDTtlT_mm_fp_EES0_", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); +} + +TEST(Demangle, Postdecrement) { + char tmp[80]; + + // Source: + // + // template <class T> auto f(T t) -> decltype(T{t--}) { return t; } + // template auto f<int>(int t) -> decltype(int{t--}); + // + // Full LLVM demangling of the instantiation of f: + // + // decltype(int{fp--}) f<int>(int) + EXPECT_TRUE(Demangle("_Z1fIiEDTtlT_mmfp_EES0_", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); +} + +TEST(Demangle, UnaryFoldExpressions) { + char tmp[80]; + + // Source: + // + // template <bool b> struct S {}; + // + // template <class... T> auto f(T... t) -> S<((sizeof(T) == 4) || ...)> { + // return {}; + // } + // + // void g() { f(1, 2L); } + // + // Full LLVM demangling of the instantiation of f: + // + // S<((sizeof (int) == 4, sizeof (long) == 4) || ...)> f<int, long>(int, long) + EXPECT_TRUE(Demangle("_Z1fIJilEE1SIXfrooeqstT_Li4EEEDpS1_", + tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); + + // The like with a left fold. + // + // S<(... || (sizeof (int) == 4, sizeof (long) == 4))> f<int, long>(int, long) + EXPECT_TRUE(Demangle("_Z1fIJilEE1SIXflooeqstT_Li4EEEDpS1_", + tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); +} + +TEST(Demangle, BinaryFoldExpressions) { + char tmp[80]; + + // Source: + // + // template <bool b> struct S {}; + // + // template <class... T> auto f(T... t) + // -> S<((sizeof(T) == 4) || ... || false)> { + // return {}; + // } + // + // void g() { f(1, 2L); } + // + // Full LLVM demangling of the instantiation of f: + // + // S<((sizeof (int) == 4, sizeof (long) == 4) || ... || false)> + // f<int, long>(int, long) + EXPECT_TRUE(Demangle("_Z1fIJilEE1SIXfRooeqstT_Li4ELb0EEEDpS1_", + tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); + + // The like with a left fold. + // + // S<(false || ... || (sizeof (int) == 4, sizeof (long) == 4))> + // f<int, long>(int, long) + EXPECT_TRUE(Demangle("_Z1fIJilEE1SIXfLooLb0EeqstT_Li4EEEDpS1_", + tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); +} + +TEST(Demangle, SizeofPacks) { + char tmp[80]; + + // template <size_t i> struct S {}; + // + // template <class... T> auto f(T... p) -> S<sizeof...(T)> { return {}; } + // template auto f<int, long>(int, long) -> S<2>; + // + // template <class... T> auto g(T... p) -> S<sizeof...(p)> { return {}; } + // template auto g<int, long>(int, long) -> S<2>; + + // S<sizeof...(int, long)> f<int, long>(int, long) + EXPECT_TRUE(Demangle("_Z1fIJilEE1SIXsZT_EEDpT_", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); + + // S<sizeof... (fp)> g<int, long>(int, long) + EXPECT_TRUE(Demangle("_Z1gIJilEE1SIXsZfp_EEDpT_", tmp, sizeof(tmp))); + EXPECT_STREQ("g<>()", tmp); +} + +TEST(Demangle, SizeofPackInvolvingAnAliasTemplate) { + char tmp[80]; + + // Source: + // + // template <class... T> using A = char[sizeof...(T)]; + // template <class... U> void f(const A<U..., int>&) {} + // template void f<int>(const A<int, int>&); + // + // Full LLVM demangling of the instantiation of f: + // + // void f<int>(char const (&) [sizeof... (int, int)]) + EXPECT_TRUE(Demangle("_Z1fIJiEEvRAsPDpT_iE_Kc", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); +} + +TEST(Demangle, UserDefinedLiteral) { + char tmp[80]; + + // Source: + // + // unsigned long long operator""_lit(unsigned long long x) { return x; } + // + // LLVM demangling: + // + // operator"" _lit(unsigned long long) + EXPECT_TRUE(Demangle("_Zli4_lity", tmp, sizeof(tmp))); + EXPECT_STREQ("operator\"\" _lit()", tmp); +} + +TEST(Demangle, Spaceship) { + char tmp[80]; + + // #include <compare> + // + // struct S { auto operator<=>(const S&) const = default; }; + // auto (S::*f) = &S::operator<=>; // make sure S::operator<=> is emitted + // + // template <class T> auto g(T x, T y) -> decltype(x <=> y) { + // return x <=> y; + // } + // template auto g<S>(S x, S y) -> decltype(x <=> y); + + // S::operator<=>(S const&) const + EXPECT_TRUE(Demangle("_ZNK1SssERKS_", tmp, sizeof(tmp))); + EXPECT_STREQ("S::operator<=>()", tmp); + + // decltype(fp <=> fp0) g<S>(S, S) + EXPECT_TRUE(Demangle("_Z1gI1SEDTssfp_fp0_ET_S2_", tmp, sizeof(tmp))); + EXPECT_STREQ("g<>()", tmp); +} + +TEST(Demangle, CoAwait) { + char tmp[80]; + + // ns::Awaitable::operator co_await() const + EXPECT_TRUE(Demangle("_ZNK2ns9AwaitableawEv", tmp, sizeof(tmp))); + EXPECT_STREQ("ns::Awaitable::operator co_await()", tmp); +} + +TEST(Demangle, VendorExtendedExpressions) { + char tmp[80]; + + // void f<__e()>() + EXPECT_TRUE(Demangle("_Z1fIXu3__eEEEvv", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); + + // void f<__e(int, long)>() + EXPECT_TRUE(Demangle("_Z1fIXu3__eilEEEvv", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); +} + +TEST(Demangle, DirectListInitialization) { + char tmp[80]; + + // template <class T> decltype(T{}) f() { return T{}; } + // template decltype(int{}) f<int>(); + // + // struct XYZ { int x, y, z; }; + // template <class T> decltype(T{1, 2, 3}) g() { return T{1, 2, 3}; } + // template decltype(XYZ{1, 2, 3}) g<XYZ>(); + // + // template <class T> decltype(T{.x = 1, .y = 2, .z = 3}) h() { + // return T{.x = 1, .y = 2, .z = 3}; + // } + // template decltype(XYZ{.x = 1, .y = 2, .z = 3}) h<XYZ>(); + // + // // The following two cases require full C99 designated initializers, + // // not part of C++ but likely available as an extension if you ask your + // // compiler nicely. + // + // struct A { int a[4]; }; + // template <class T> decltype(T{.a[2] = 42}) i() { return T{.a[2] = 42}; } + // template decltype(A{.a[2] = 42}) i<A>(); + // + // template <class T> decltype(T{.a[1 ... 3] = 42}) j() { + // return T{.a[1 ... 3] = 42}; + // } + // template decltype(A{.a[1 ... 3] = 42}) j<A>(); + + // decltype(int{}) f<int>() + EXPECT_TRUE(Demangle("_Z1fIiEDTtlT_EEv", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); + + // decltype(XYZ{1, 2, 3}) g<XYZ>() + EXPECT_TRUE(Demangle("_Z1gI3XYZEDTtlT_Li1ELi2ELi3EEEv", tmp, sizeof(tmp))); + EXPECT_STREQ("g<>()", tmp); + + // decltype(XYZ{.x = 1, .y = 2, .z = 3}) h<XYZ>() + EXPECT_TRUE(Demangle("_Z1hI3XYZEDTtlT_di1xLi1Edi1yLi2Edi1zLi3EEEv", + tmp, sizeof(tmp))); + EXPECT_STREQ("h<>()", tmp); + + // decltype(A{.a[2] = 42}) i<A>() + EXPECT_TRUE(Demangle("_Z1iI1AEDTtlT_di1adxLi2ELi42EEEv", tmp, sizeof(tmp))); + EXPECT_STREQ("i<>()", tmp); + + // decltype(A{.a[1 ... 3] = 42}) j<A>() + EXPECT_TRUE(Demangle("_Z1jI1AEDTtlT_di1adXLi1ELi3ELi42EEEv", + tmp, sizeof(tmp))); + EXPECT_STREQ("j<>()", tmp); +} + +TEST(Demangle, SimpleInitializerLists) { + char tmp[80]; + + // Common preamble of source-code examples in this test function: + // + // #include <initializer_list> + // + // template <class T> void g(std::initializer_list<T>) {} + + // Source: + // + // template <class T> auto f() -> decltype(g<T>({})) {} + // template auto f<int>() -> decltype(g<int>({})); + // + // Full LLVM demangling of the instantiation of f: + // + // decltype(g<int>({})) f<int>() + EXPECT_TRUE(Demangle("_Z1fIiEDTcl1gIT_EilEEEv", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); + + // Source: + // + // template <class T> auto f(T x) -> decltype(g({x})) {} + // template auto f<int>(int x) -> decltype(g({x})); + // + // Full LLVM demangling of the instantiation of f: + // + // decltype(g({fp})) f<int>(int) + EXPECT_TRUE(Demangle("_Z1fIiEDTcl1gilfp_EEET_", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); + + // Source: + // + // template <class T> auto f(T x, T y) -> decltype(g({x, y})) {} + // template auto f<int>(int x, int y) -> decltype(g({x, y})); + // + // Full LLVM demangling of the instantiation of f: + // + // decltype(g({fp, fp0})) f<int>(int, int) + EXPECT_TRUE(Demangle("_Z1fIiEDTcl1gilfp_fp0_EEET_S1_", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); +} + +TEST(Demangle, BracedListImplicitlyConstructingAClassObject) { + char tmp[80]; + + // Source: + // + // struct S { int v; }; + // void g(S) {} + // template <class T> auto f(T x) -> decltype(g({.v = x})) {} + // template auto f<int>(int x) -> decltype(g({.v = x})); + // + // Full LLVM demangling of the instantiation of f: + // + // decltype(g({.v = fp})) f<int>(int) + EXPECT_TRUE(Demangle("_Z1fIiEDTcl1gildi1vfp_EEET_", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); +} + +TEST(Demangle, SimpleNewExpression) { + char tmp[80]; + + // Source: + // + // template <class T> decltype(T{*new T}) f() { return T{}; } + // template decltype(int{*new int}) f<int>(); + // + // Full LLVM demangling of the instantiation of f: + // + // decltype(int{*(new int)}) f<int>() + EXPECT_TRUE(Demangle("_Z1fIiEDTtlT_denw_S0_EEEv", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); +} + +TEST(Demangle, NewExpressionWithEmptyParentheses) { + char tmp[80]; + + // Source: + // + // template <class T> decltype(T{*new T()}) f() { return T{}; } + // template decltype(int{*new int()}) f<int>(); + // + // Full LLVM demangling of the instantiation of f: + // + // decltype(int{*(new int)}) f<int>() + EXPECT_TRUE(Demangle("_Z1fIiEDTtlT_denw_S0_piEEEv", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); +} + +TEST(Demangle, NewExpressionWithNonemptyParentheses) { + char tmp[80]; + + // Source: + // + // template <class T> decltype(T{*new T(42)}) f() { return T{}; } + // template decltype(int{*new int(42)}) f<int>(); + // + // Full LLVM demangling of the instantiation of f: + // + // decltype(int{*(new int(42))}) f<int>() + EXPECT_TRUE(Demangle("_Z1fIiEDTtlT_denw_S0_piLi42EEEEv", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); +} + +TEST(Demangle, PlacementNewExpression) { + char tmp[80]; + + // Source: + // + // #include <new> + // + // template <class T> auto f(T t) -> decltype(T{*new (&t) T(42)}) { + // return t; + // } + // template auto f<int>(int t) -> decltype(int{*new (&t) int(42)}); + // + // Full LLVM demangling of the instantiation of f: + // + // decltype(int{*(new(&fp) int(42))}) f<int>(int) + EXPECT_TRUE(Demangle("_Z1fIiEDTtlT_denwadfp__S0_piLi42EEEES0_", + tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); +} + +TEST(Demangle, GlobalScopeNewExpression) { + char tmp[80]; + + // Source: + // + // template <class T> decltype(T{*::new T}) f() { return T{}; } + // template decltype(int{*::new int}) f<int>(); + // + // Full LLVM demangling of the instantiation of f: + // + // decltype(int{*(::new int)}) f<int>() + EXPECT_TRUE(Demangle("_Z1fIiEDTtlT_degsnw_S0_EEEv", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); +} + +TEST(Demangle, NewExpressionWithEmptyBraces) { + char tmp[80]; + + // Source: + // + // template <class T> decltype(T{*new T{}}) f() { return T{}; } + // template decltype(int{*new int{}}) f<int>(); + // + // GNU demangling: + // + // decltype (int{*(new int{})}) f<int>() + EXPECT_TRUE(Demangle("_Z1fIiEDTtlT_denw_S0_ilEEEv", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); +} + +TEST(Demangle, NewExpressionWithNonemptyBraces) { + char tmp[80]; + + // Source: + // + // template <class T> decltype(T{*new T{42}}) f() { return T{}; } + // template decltype(int{*new int{42}}) f<int>(); + // + // GNU demangling: + // + // decltype (int{*(new int{42})}) f<int>() + EXPECT_TRUE(Demangle("_Z1fIiEDTtlT_denw_S0_ilLi42EEEEv", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); +} + +TEST(Demangle, SimpleArrayNewExpression) { + char tmp[80]; + + // Source: + // + // template <class T> decltype(T{*new T[1]}) f() { return T{}; } + // template decltype(int{*new int[1]}) f<int>(); + // + // Full LLVM demangling of the instantiation of f: + // + // decltype(int{*(new[] int)}) f<int>() + EXPECT_TRUE(Demangle("_Z1fIiEDTtlT_dena_S0_EEEv", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); +} + +TEST(Demangle, ArrayNewExpressionWithEmptyParentheses) { + char tmp[80]; + + // Source: + // + // template <class T> decltype(T{*new T[1]()}) f() { return T{}; } + // template decltype(int{*new int[1]()}) f<int>(); + // + // Full LLVM demangling of the instantiation of f: + // + // decltype(int{*(new[] int)}) f<int>() + EXPECT_TRUE(Demangle("_Z1fIiEDTtlT_dena_S0_piEEEv", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); +} + +TEST(Demangle, ArrayPlacementNewExpression) { + char tmp[80]; + + // Source: + // + // #include <new> + // + // template <class T> auto f(T t) -> decltype(T{*new (&t) T[1]}) { + // return T{}; + // } + // template auto f<int>(int t) -> decltype(int{*new (&t) int[1]}); + // + // Full LLVM demangling of the instantiation of f: + // + // decltype(int{*(new[](&fp) int)}) f<int>(int) + EXPECT_TRUE(Demangle("_Z1fIiEDTtlT_denaadfp__S0_EEES0_", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); +} + +TEST(Demangle, GlobalScopeArrayNewExpression) { + char tmp[80]; + + // Source: + // + // template <class T> decltype(T{*::new T[1]}) f() { return T{}; } + // template decltype(int{*::new int[1]}) f<int>(); + // + // Full LLVM demangling of the instantiation of f: + // + // decltype(int{*(::new[] int)}) f<int>() + EXPECT_TRUE(Demangle("_Z1fIiEDTtlT_degsna_S0_EEEv", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); +} + +TEST(Demangle, ArrayNewExpressionWithTwoElementsInBraces) { + char tmp[80]; + + // Source: + // + // template <class T> decltype(T{*new T[2]{1, 2}}) f() { return T{}; } + // template decltype(int{*new int[2]{1, 2}}) f<int>(); + // + // GNU demangling: + // + // decltype (int{*(new int{1, 2})}) f<int>() + EXPECT_TRUE(Demangle("_Z1fIiEDTtlT_dena_S0_ilLi1ELi2EEEEv", + tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); +} + +TEST(Demangle, SimpleDeleteExpression) { + char tmp[80]; + + // Source: + // + // template <class T> auto f(T* p) -> decltype(delete p) {} + // template auto f<int>(int* p) -> decltype(delete p); + // + // LLVM demangling: + // + // decltype(delete fp) f<int>(int*) + EXPECT_TRUE(Demangle("_Z1fIiEDTdlfp_EPT_", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); +} + +TEST(Demangle, GlobalScopeDeleteExpression) { + char tmp[80]; + + // Source: + // + // template <class T> auto f(T* p) -> decltype(::delete p) {} + // template auto f<int>(int* p) -> decltype(::delete p); + // + // LLVM demangling: + // + // decltype(::delete fp) f<int>(int*) + EXPECT_TRUE(Demangle("_Z1fIiEDTgsdlfp_EPT_", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); +} + +TEST(Demangle, SimpleArrayDeleteExpression) { + char tmp[80]; + + // Source: + // + // template <class T> auto f(T* a) -> decltype(delete[] a) {} + // template auto f<int>(int* a) -> decltype(delete[] a); + // + // LLVM demangling: + // + // decltype(delete[] fp) f<int>(int*) + EXPECT_TRUE(Demangle("_Z1fIiEDTdafp_EPT_", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); +} + +TEST(Demangle, GlobalScopeArrayDeleteExpression) { + char tmp[80]; + + // Source: + // + // template <class T> auto f(T* a) -> decltype(::delete[] a) {} + // template auto f<int>(int* a) -> decltype(::delete[] a); + // + // LLVM demangling: + // + // decltype(::delete[] fp) f<int>(int*) + EXPECT_TRUE(Demangle("_Z1fIiEDTgsdafp_EPT_", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); +} + +TEST(Demangle, ReferenceQualifiedFunctionTypes) { + char tmp[80]; + + // void f(void (*)() const &, int) + EXPECT_TRUE(Demangle("_Z1fPKFvvREi", tmp, sizeof(tmp))); + EXPECT_STREQ("f()", tmp); + + // void f(void (*)() &&, int) + EXPECT_TRUE(Demangle("_Z1fPFvvOEi", tmp, sizeof(tmp))); + EXPECT_STREQ("f()", tmp); + + // void f(void (*)(int&) &, int) + EXPECT_TRUE(Demangle("_Z1fPFvRiREi", tmp, sizeof(tmp))); + EXPECT_STREQ("f()", tmp); + + // void f(void (*)(S&&) &&, int) + EXPECT_TRUE(Demangle("_Z1fPFvO1SOEi", tmp, sizeof(tmp))); + EXPECT_STREQ("f()", tmp); +} + +TEST(Demangle, DynamicCast) { + char tmp[80]; + + // Source: + // + // template <class T> auto f(T* p) -> decltype(dynamic_cast<const T*>(p)) { + // return p; + // } + // struct S {}; + // void g(S* p) { f(p); } + // + // Full LLVM demangling of the instantiation of f: + // + // decltype(dynamic_cast<S const*>(fp)) f<S>(S*) + EXPECT_TRUE(Demangle("_Z1fI1SEDTdcPKT_fp_EPS1_", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); +} + +TEST(Demangle, StaticCast) { + char tmp[80]; + + // Source: + // + // template <class T> auto f(T* p) -> decltype(static_cast<const T*>(p)) { + // return p; + // } + // void g(int* p) { f(p); } + // + // Full LLVM demangling of the instantiation of f: + // + // decltype(static_cast<int const*>(fp)) f<int>(int*) + EXPECT_TRUE(Demangle("_Z1fIiEDTscPKT_fp_EPS0_", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); +} + +TEST(Demangle, ConstCast) { + char tmp[80]; + + // Source: + // + // template <class T> auto f(T* p) -> decltype(const_cast<const T*>(p)) { + // return p; + // } + // void g(int* p) { f(p); } + // + // Full LLVM demangling of the instantiation of f: + // + // decltype(const_cast<int const*>(fp)) f<int>(int*) + EXPECT_TRUE(Demangle("_Z1fIiEDTccPKT_fp_EPS0_", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); +} + +TEST(Demangle, ReinterpretCast) { + char tmp[80]; + + // Source: + // + // template <class T> auto f(T* p) + // -> decltype(reinterpret_cast<const T*>(p)) { + // return p; + // } + // void g(int* p) { f(p); } + // + // Full LLVM demangling of the instantiation of f: + // + // decltype(reinterpret_cast<int const*>(fp)) f<int>(int*) + EXPECT_TRUE(Demangle("_Z1fIiEDTrcPKT_fp_EPS0_", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); +} + +TEST(Demangle, TypeidType) { + char tmp[80]; + + // Source: + // + // #include <typeinfo> + // + // template <class T> decltype(typeid(T).name()) f(T) { return nullptr; } + // template decltype(typeid(int).name()) f<int>(int); + // + // Full LLVM demangling of the instantiation of f: + // + // decltype(typeid (int).name()) f<int>(int) + EXPECT_TRUE(Demangle("_Z1fIiEDTcldttiT_4nameEES0_", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); +} + +TEST(Demangle, TypeidExpression) { + char tmp[80]; + + // Source: + // + // #include <typeinfo> + // + // template <class T> decltype(typeid(T{}).name()) f(T) { return nullptr; } + // template decltype(typeid(int{}).name()) f<int>(int); + // + // Full LLVM demangling of the instantiation of f: + // + // decltype(typeid (int{}).name()) f<int>(int) + EXPECT_TRUE(Demangle("_Z1fIiEDTcldttetlT_E4nameEES0_", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); +} + +TEST(Demangle, AlignofType) { + char tmp[80]; + + // Source: + // + // template <class T> T f(T (&a)[alignof(T)]) { return a[0]; } + // template int f<int>(int (&)[alignof(int)]); + // + // Full LLVM demangling of the instantiation of f: + // + // int f<int>(int (&) [alignof (int)]) + EXPECT_TRUE(Demangle("_Z1fIiET_RAatS0__S0_", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); +} + +TEST(Demangle, AlignofExpression) { + char tmp[80]; + + // Source (note that this uses a GNU extension; it is not standard C++): + // + // template <class T> T f(T (&a)[alignof(T{})]) { return a[0]; } + // template int f<int>(int (&)[alignof(int{})]); + // + // Full LLVM demangling of the instantiation of f: + // + // int f<int>(int (&) [alignof (int{})]) + EXPECT_TRUE(Demangle("_Z1fIiET_RAaztlS0_E_S0_", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); +} + +TEST(Demangle, NoexceptExpression) { + char tmp[80]; + + // Source: + // + // template <class T> void f(T (&a)[noexcept(T{})]) {} + // template void f<int>(int (&)[noexcept(int{})]); + // + // Full LLVM demangling of the instantiation of f: + // + // void f<int>(int (&) [noexcept (int{})]) + EXPECT_TRUE(Demangle("_Z1fIiEvRAnxtlT_E_S0_", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); +} + +TEST(Demangle, UnaryThrow) { + char tmp[80]; + + // Source: + // + // template <bool b> decltype(b ? throw b : 0) f() { return 0; } + // template decltype(false ? throw false : 0) f<false>(); + // + // Full LLVM demangling of the instantiation of f: + // + // decltype(false ? throw false : 0) f<false>() + EXPECT_TRUE(Demangle("_Z1fILb0EEDTquT_twT_Li0EEv", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); +} + +TEST(Demangle, NullaryThrow) { + char tmp[80]; + + // Source: + // + // template <bool b> decltype(b ? throw : 0) f() { return 0; } + // template decltype(false ? throw : 0) f<false>(); + // + // Full LLVM demangling of the instantiation of f: + // + // decltype(false ? throw : 0) f<false>() + EXPECT_TRUE(Demangle("_Z1fILb0EEDTquT_trLi0EEv", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); +} + +TEST(Demangle, ThreadLocalWrappers) { + char tmp[80]; + + EXPECT_TRUE(Demangle("_ZTWN2ns3varE", tmp, sizeof(tmp))); + EXPECT_STREQ("thread-local wrapper routine for ns::var", tmp); + + EXPECT_TRUE(Demangle("_ZTHN2ns3varE", tmp, sizeof(tmp))); + EXPECT_STREQ("thread-local initialization routine for ns::var", tmp); +} + +TEST(Demangle, DubiousSrStSymbols) { + char tmp[80]; + + // GNU demangling (not accepted by LLVM): + // + // S<std::u<char>::v> f<char>() + EXPECT_TRUE(Demangle("_Z1fIcE1SIXsrSt1uIT_E1vEEv", tmp, sizeof(tmp))); + EXPECT_STREQ("f<>()", tmp); + + // A real case from the wild. + // + // GNU demangling (not accepted by LLVM) with line breaks and indentation + // added for readability: + // + // __gnu_cxx::__enable_if<std::__is_char<char>::__value, bool>::__type + // std::operator==<char>( + // std::__cxx11::basic_string<char, std::char_traits<char>, + // std::allocator<char> > const&, + // std::__cxx11::basic_string<char, std::char_traits<char>, + // std::allocator<char> > const&) + EXPECT_TRUE(Demangle( + "_ZSteqIcEN9__gnu_cxx11__enable_if" + "IXsrSt9__is_charIT_E7__valueEbE" + "6__typeE" + "RKNSt7__cxx1112basic_stringIS3_St11char_traitsIS3_ESaIS3_EEESE_", + tmp, sizeof(tmp))); + EXPECT_STREQ("std::operator==<>()", tmp); +} + +// Test one Rust symbol to exercise Demangle's delegation path. Rust demangling +// itself is more thoroughly tested in demangle_rust_test.cc. +TEST(Demangle, DelegatesToDemangleRustSymbolEncoding) { + char tmp[80]; + + EXPECT_TRUE(Demangle("_RNvC8my_crate7my_func", tmp, sizeof(tmp))); + EXPECT_STREQ("my_crate::my_func", tmp); +} + // Tests that verify that Demangle footprint is within some limit. // They are not to be run under sanitizers as the sanitizers increase // stack consumption by about 4x. diff --git a/absl/debugging/internal/elf_mem_image.cc b/absl/debugging/internal/elf_mem_image.cc index 42dcd3cd..2c168309 100644 --- a/absl/debugging/internal/elf_mem_image.cc +++ b/absl/debugging/internal/elf_mem_image.cc @@ -20,8 +20,11 @@ #ifdef ABSL_HAVE_ELF_MEM_IMAGE // defined in elf_mem_image.h #include <string.h> + #include <cassert> #include <cstddef> +#include <cstdint> + #include "absl/base/config.h" #include "absl/base/internal/raw_logging.h" @@ -86,20 +89,14 @@ ElfMemImage::ElfMemImage(const void *base) { Init(base); } -int ElfMemImage::GetNumSymbols() const { - if (!hash_) { - return 0; - } - // See http://www.caldera.com/developers/gabi/latest/ch5.dynamic.html#hash - return static_cast<int>(hash_[1]); -} +uint32_t ElfMemImage::GetNumSymbols() const { return num_syms_; } -const ElfW(Sym) *ElfMemImage::GetDynsym(int index) const { +const ElfW(Sym) * ElfMemImage::GetDynsym(uint32_t index) const { ABSL_RAW_CHECK(index < GetNumSymbols(), "index out of range"); return dynsym_ + index; } -const ElfW(Versym) *ElfMemImage::GetVersym(int index) const { +const ElfW(Versym) *ElfMemImage::GetVersym(uint32_t index) const { ABSL_RAW_CHECK(index < GetNumSymbols(), "index out of range"); return versym_ + index; } @@ -154,7 +151,7 @@ void ElfMemImage::Init(const void *base) { dynstr_ = nullptr; versym_ = nullptr; verdef_ = nullptr; - hash_ = nullptr; + num_syms_ = 0; strsize_ = 0; verdefnum_ = 0; // Sentinel: PT_LOAD .p_vaddr can't possibly be this. @@ -219,12 +216,17 @@ void ElfMemImage::Init(const void *base) { base_as_char - reinterpret_cast<const char *>(link_base_); ElfW(Dyn)* dynamic_entry = reinterpret_cast<ElfW(Dyn)*>( static_cast<intptr_t>(dynamic_program_header->p_vaddr) + relocation); + uint32_t *sysv_hash = nullptr; + uint32_t *gnu_hash = nullptr; for (; dynamic_entry->d_tag != DT_NULL; ++dynamic_entry) { const auto value = static_cast<intptr_t>(dynamic_entry->d_un.d_val) + relocation; switch (dynamic_entry->d_tag) { case DT_HASH: - hash_ = reinterpret_cast<ElfW(Word) *>(value); + sysv_hash = reinterpret_cast<uint32_t *>(value); + break; + case DT_GNU_HASH: + gnu_hash = reinterpret_cast<uint32_t *>(value); break; case DT_SYMTAB: dynsym_ = reinterpret_cast<ElfW(Sym) *>(value); @@ -249,13 +251,38 @@ void ElfMemImage::Init(const void *base) { break; } } - if (!hash_ || !dynsym_ || !dynstr_ || !versym_ || + if ((!sysv_hash && !gnu_hash) || !dynsym_ || !dynstr_ || !versym_ || !verdef_ || !verdefnum_ || !strsize_) { assert(false); // invalid VDSO // Mark this image as not present. Can not recur infinitely. Init(nullptr); return; } + if (sysv_hash) { + num_syms_ = sysv_hash[1]; + } else { + assert(gnu_hash); + // Compute the number of symbols for DT_GNU_HASH, which is specified by + // https://sourceware.org/gnu-gabi/program-loading-and-dynamic-linking.txt + uint32_t nbuckets = gnu_hash[0]; + // The buckets array is located after the header (4 uint32) and the bloom + // filter (size_t array of gnu_hash[2] elements). + uint32_t *buckets = gnu_hash + 4 + sizeof(size_t) / 4 * gnu_hash[2]; + // Find the chain of the last non-empty bucket. + uint32_t idx = 0; + for (uint32_t i = nbuckets; i > 0;) { + idx = buckets[--i]; + if (idx != 0) break; + } + if (idx != 0) { + // Find the last element of the chain, which has an odd value. + // Add one to get the number of symbols. + uint32_t *chain = buckets + nbuckets - gnu_hash[1]; + while (chain[idx++] % 2 == 0) { + } + } + num_syms_ = idx; + } } bool ElfMemImage::LookupSymbol(const char *name, @@ -300,9 +327,9 @@ bool ElfMemImage::LookupSymbolByAddress(const void *address, return false; } -ElfMemImage::SymbolIterator::SymbolIterator(const void *const image, int index) - : index_(index), image_(image) { -} +ElfMemImage::SymbolIterator::SymbolIterator(const void *const image, + uint32_t index) + : index_(index), image_(image) {} const ElfMemImage::SymbolInfo *ElfMemImage::SymbolIterator::operator->() const { return &info_; @@ -335,7 +362,7 @@ ElfMemImage::SymbolIterator ElfMemImage::end() const { return SymbolIterator(this, GetNumSymbols()); } -void ElfMemImage::SymbolIterator::Update(int increment) { +void ElfMemImage::SymbolIterator::Update(uint32_t increment) { const ElfMemImage *image = reinterpret_cast<const ElfMemImage *>(image_); ABSL_RAW_CHECK(image->IsPresent() || increment == 0, ""); if (!image->IsPresent()) { diff --git a/absl/debugging/internal/elf_mem_image.h b/absl/debugging/internal/elf_mem_image.h index e7fe6ab0..19c4952e 100644 --- a/absl/debugging/internal/elf_mem_image.h +++ b/absl/debugging/internal/elf_mem_image.h @@ -22,6 +22,7 @@ // Including this will define the __GLIBC__ macro if glibc is being // used. #include <climits> +#include <cstdint> #include "absl/base/config.h" @@ -82,10 +83,10 @@ class ElfMemImage { bool operator!=(const SymbolIterator &rhs) const; bool operator==(const SymbolIterator &rhs) const; private: - SymbolIterator(const void *const image, int index); - void Update(int incr); + SymbolIterator(const void *const image, uint32_t index); + void Update(uint32_t incr); SymbolInfo info_; - int index_; + uint32_t index_; const void *const image_; }; @@ -94,14 +95,14 @@ class ElfMemImage { void Init(const void *base); bool IsPresent() const { return ehdr_ != nullptr; } const ElfW(Phdr)* GetPhdr(int index) const; - const ElfW(Sym)* GetDynsym(int index) const; - const ElfW(Versym)* GetVersym(int index) const; + const ElfW(Sym) * GetDynsym(uint32_t index) const; + const ElfW(Versym)* GetVersym(uint32_t index) const; const ElfW(Verdef)* GetVerdef(int index) const; const ElfW(Verdaux)* GetVerdefAux(const ElfW(Verdef) *verdef) const; const char* GetDynstr(ElfW(Word) offset) const; const void* GetSymAddr(const ElfW(Sym) *sym) const; const char* GetVerstr(ElfW(Word) offset) const; - int GetNumSymbols() const; + uint32_t GetNumSymbols() const; SymbolIterator begin() const; SymbolIterator end() const; @@ -124,8 +125,8 @@ class ElfMemImage { const ElfW(Sym) *dynsym_; const ElfW(Versym) *versym_; const ElfW(Verdef) *verdef_; - const ElfW(Word) *hash_; const char *dynstr_; + uint32_t num_syms_; size_t strsize_; size_t verdefnum_; ElfW(Addr) link_base_; // Link-time base (p_vaddr of first PT_LOAD). diff --git a/absl/debugging/internal/stacktrace_aarch64-inl.inc b/absl/debugging/internal/stacktrace_aarch64-inl.inc index 1caf7bbe..b123479b 100644 --- a/absl/debugging/internal/stacktrace_aarch64-inl.inc +++ b/absl/debugging/internal/stacktrace_aarch64-inl.inc @@ -89,6 +89,8 @@ struct StackInfo { static bool InsideSignalStack(void** ptr, const StackInfo* stack_info) { uintptr_t comparable_ptr = reinterpret_cast<uintptr_t>(ptr); + if (stack_info->sig_stack_high == kUnknownStackEnd) + return false; return (comparable_ptr >= stack_info->sig_stack_low && comparable_ptr < stack_info->sig_stack_high); } @@ -122,13 +124,6 @@ static void **NextStackFrame(void **old_frame_pointer, const void *uc, if (pre_signal_frame_pointer >= old_frame_pointer) { new_frame_pointer = pre_signal_frame_pointer; } - // Check that alleged frame pointer is actually readable. This is to - // prevent "double fault" in case we hit the first fault due to e.g. - // stack corruption. - if (!absl::debugging_internal::AddressIsReadable( - new_frame_pointer)) - return nullptr; - } } #endif @@ -136,6 +131,14 @@ static void **NextStackFrame(void **old_frame_pointer, const void *uc, if ((reinterpret_cast<uintptr_t>(new_frame_pointer) & 7) != 0) return nullptr; + // Check that alleged frame pointer is actually readable. This is to + // prevent "double fault" in case we hit the first fault due to e.g. + // stack corruption. + if (!absl::debugging_internal::AddressIsReadable( + new_frame_pointer)) + return nullptr; + } + // Only check the size if both frames are in the same stack. if (InsideSignalStack(new_frame_pointer, stack_info) == InsideSignalStack(old_frame_pointer, stack_info)) { diff --git a/absl/debugging/internal/utf8_for_code_point.cc b/absl/debugging/internal/utf8_for_code_point.cc new file mode 100644 index 00000000..658a3b51 --- /dev/null +++ b/absl/debugging/internal/utf8_for_code_point.cc @@ -0,0 +1,70 @@ +// Copyright 2024 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/debugging/internal/utf8_for_code_point.h" + +#include <cstdint> + +#include "absl/base/config.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace debugging_internal { +namespace { + +// UTF-8 encoding bounds. +constexpr uint32_t kMinSurrogate = 0xd800, kMaxSurrogate = 0xdfff; +constexpr uint32_t kMax1ByteCodePoint = 0x7f; +constexpr uint32_t kMax2ByteCodePoint = 0x7ff; +constexpr uint32_t kMax3ByteCodePoint = 0xffff; +constexpr uint32_t kMaxCodePoint = 0x10ffff; + +} // namespace + +Utf8ForCodePoint::Utf8ForCodePoint(uint64_t code_point) { + if (code_point <= kMax1ByteCodePoint) { + length = 1; + bytes[0] = static_cast<char>(code_point); + return; + } + + if (code_point <= kMax2ByteCodePoint) { + length = 2; + bytes[0] = static_cast<char>(0xc0 | (code_point >> 6)); + bytes[1] = static_cast<char>(0x80 | (code_point & 0x3f)); + return; + } + + if (kMinSurrogate <= code_point && code_point <= kMaxSurrogate) return; + + if (code_point <= kMax3ByteCodePoint) { + length = 3; + bytes[0] = static_cast<char>(0xe0 | (code_point >> 12)); + bytes[1] = static_cast<char>(0x80 | ((code_point >> 6) & 0x3f)); + bytes[2] = static_cast<char>(0x80 | (code_point & 0x3f)); + return; + } + + if (code_point > kMaxCodePoint) return; + + length = 4; + bytes[0] = static_cast<char>(0xf0 | (code_point >> 18)); + bytes[1] = static_cast<char>(0x80 | ((code_point >> 12) & 0x3f)); + bytes[2] = static_cast<char>(0x80 | ((code_point >> 6) & 0x3f)); + bytes[3] = static_cast<char>(0x80 | (code_point & 0x3f)); +} + +} // namespace debugging_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/debugging/internal/utf8_for_code_point.h b/absl/debugging/internal/utf8_for_code_point.h new file mode 100644 index 00000000..f23cde6d --- /dev/null +++ b/absl/debugging/internal/utf8_for_code_point.h @@ -0,0 +1,47 @@ +// Copyright 2024 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_DEBUGGING_INTERNAL_UTF8_FOR_CODE_POINT_H_ +#define ABSL_DEBUGGING_INTERNAL_UTF8_FOR_CODE_POINT_H_ + +#include <cstdint> + +#include "absl/base/config.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace debugging_internal { + +struct Utf8ForCodePoint { + // Converts a Unicode code point to the corresponding UTF-8 byte sequence. + // Async-signal-safe to support use in symbolizing stack traces from a signal + // handler. + explicit Utf8ForCodePoint(uint64_t code_point); + + // Returns true if the constructor's code_point argument was valid. + bool ok() const { return length != 0; } + + // If code_point was in range, then 1 <= length <= 4, and the UTF-8 encoding + // is found in bytes[0 .. (length - 1)]. If code_point was invalid, then + // length == 0. In either case, the contents of bytes[length .. 3] are + // unspecified. + char bytes[4] = {}; + uint32_t length = 0; +}; + +} // namespace debugging_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_DEBUGGING_INTERNAL_UTF8_FOR_CODE_POINT_H_ diff --git a/absl/debugging/internal/utf8_for_code_point_test.cc b/absl/debugging/internal/utf8_for_code_point_test.cc new file mode 100644 index 00000000..dd0591ae --- /dev/null +++ b/absl/debugging/internal/utf8_for_code_point_test.cc @@ -0,0 +1,175 @@ +// Copyright 2024 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/debugging/internal/utf8_for_code_point.h" + +#include <cstdint> + +#include "gtest/gtest.h" +#include "absl/base/config.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace debugging_internal { +namespace { + +TEST(Utf8ForCodePointTest, RecognizesTheSmallestCodePoint) { + Utf8ForCodePoint utf8(uint64_t{0}); + ASSERT_EQ(utf8.length, 1); + EXPECT_EQ(utf8.bytes[0], '\0'); +} + +TEST(Utf8ForCodePointTest, RecognizesAsciiSmallA) { + Utf8ForCodePoint utf8(uint64_t{'a'}); + ASSERT_EQ(utf8.length, 1); + EXPECT_EQ(utf8.bytes[0], 'a'); +} + +TEST(Utf8ForCodePointTest, RecognizesTheLargestOneByteCodePoint) { + Utf8ForCodePoint utf8(uint64_t{0x7f}); + ASSERT_EQ(utf8.length, 1); + EXPECT_EQ(utf8.bytes[0], '\x7f'); +} + +TEST(Utf8ForCodePointTest, RecognizesTheSmallestTwoByteCodePoint) { + Utf8ForCodePoint utf8(uint64_t{0x80}); + ASSERT_EQ(utf8.length, 2); + EXPECT_EQ(utf8.bytes[0], static_cast<char>(0xc2)); + EXPECT_EQ(utf8.bytes[1], static_cast<char>(0x80)); +} + +TEST(Utf8ForCodePointTest, RecognizesSmallNWithTilde) { + Utf8ForCodePoint utf8(uint64_t{0xf1}); + ASSERT_EQ(utf8.length, 2); + const char* want = "ñ"; + EXPECT_EQ(utf8.bytes[0], want[0]); + EXPECT_EQ(utf8.bytes[1], want[1]); +} + +TEST(Utf8ForCodePointTest, RecognizesCapitalPi) { + Utf8ForCodePoint utf8(uint64_t{0x3a0}); + ASSERT_EQ(utf8.length, 2); + const char* want = "Π"; + EXPECT_EQ(utf8.bytes[0], want[0]); + EXPECT_EQ(utf8.bytes[1], want[1]); +} + +TEST(Utf8ForCodePointTest, RecognizesTheLargestTwoByteCodePoint) { + Utf8ForCodePoint utf8(uint64_t{0x7ff}); + ASSERT_EQ(utf8.length, 2); + EXPECT_EQ(utf8.bytes[0], static_cast<char>(0xdf)); + EXPECT_EQ(utf8.bytes[1], static_cast<char>(0xbf)); +} + +TEST(Utf8ForCodePointTest, RecognizesTheSmallestThreeByteCodePoint) { + Utf8ForCodePoint utf8(uint64_t{0x800}); + ASSERT_EQ(utf8.length, 3); + EXPECT_EQ(utf8.bytes[0], static_cast<char>(0xe0)); + EXPECT_EQ(utf8.bytes[1], static_cast<char>(0xa0)); + EXPECT_EQ(utf8.bytes[2], static_cast<char>(0x80)); +} + +TEST(Utf8ForCodePointTest, RecognizesTheChineseCharacterZhong1AsInZhong1Wen2) { + Utf8ForCodePoint utf8(uint64_t{0x4e2d}); + ASSERT_EQ(utf8.length, 3); + const char* want = "中"; + EXPECT_EQ(utf8.bytes[0], want[0]); + EXPECT_EQ(utf8.bytes[1], want[1]); + EXPECT_EQ(utf8.bytes[2], want[2]); +} + +TEST(Utf8ForCodePointTest, RecognizesOneBeforeTheSmallestSurrogate) { + Utf8ForCodePoint utf8(uint64_t{0xd7ff}); + ASSERT_EQ(utf8.length, 3); + EXPECT_EQ(utf8.bytes[0], static_cast<char>(0xed)); + EXPECT_EQ(utf8.bytes[1], static_cast<char>(0x9f)); + EXPECT_EQ(utf8.bytes[2], static_cast<char>(0xbf)); +} + +TEST(Utf8ForCodePointTest, RejectsTheSmallestSurrogate) { + Utf8ForCodePoint utf8(uint64_t{0xd800}); + EXPECT_EQ(utf8.length, 0); +} + +TEST(Utf8ForCodePointTest, RejectsTheLargestSurrogate) { + Utf8ForCodePoint utf8(uint64_t{0xdfff}); + EXPECT_EQ(utf8.length, 0); +} + +TEST(Utf8ForCodePointTest, RecognizesOnePastTheLargestSurrogate) { + Utf8ForCodePoint utf8(uint64_t{0xe000}); + ASSERT_EQ(utf8.length, 3); + EXPECT_EQ(utf8.bytes[0], static_cast<char>(0xee)); + EXPECT_EQ(utf8.bytes[1], static_cast<char>(0x80)); + EXPECT_EQ(utf8.bytes[2], static_cast<char>(0x80)); +} + +TEST(Utf8ForCodePointTest, RecognizesTheLargestThreeByteCodePoint) { + Utf8ForCodePoint utf8(uint64_t{0xffff}); + ASSERT_EQ(utf8.length, 3); + EXPECT_EQ(utf8.bytes[0], static_cast<char>(0xef)); + EXPECT_EQ(utf8.bytes[1], static_cast<char>(0xbf)); + EXPECT_EQ(utf8.bytes[2], static_cast<char>(0xbf)); +} + +TEST(Utf8ForCodePointTest, RecognizesTheSmallestFourByteCodePoint) { + Utf8ForCodePoint utf8(uint64_t{0x10000}); + ASSERT_EQ(utf8.length, 4); + EXPECT_EQ(utf8.bytes[0], static_cast<char>(0xf0)); + EXPECT_EQ(utf8.bytes[1], static_cast<char>(0x90)); + EXPECT_EQ(utf8.bytes[2], static_cast<char>(0x80)); + EXPECT_EQ(utf8.bytes[3], static_cast<char>(0x80)); +} + +TEST(Utf8ForCodePointTest, RecognizesTheJackOfHearts) { + Utf8ForCodePoint utf8(uint64_t{0x1f0bb}); + ASSERT_EQ(utf8.length, 4); + const char* want = "🂻"; + EXPECT_EQ(utf8.bytes[0], want[0]); + EXPECT_EQ(utf8.bytes[1], want[1]); + EXPECT_EQ(utf8.bytes[2], want[2]); + EXPECT_EQ(utf8.bytes[3], want[3]); +} + +TEST(Utf8ForCodePointTest, RecognizesTheLargestFourByteCodePoint) { + Utf8ForCodePoint utf8(uint64_t{0x10ffff}); + ASSERT_EQ(utf8.length, 4); + EXPECT_EQ(utf8.bytes[0], static_cast<char>(0xf4)); + EXPECT_EQ(utf8.bytes[1], static_cast<char>(0x8f)); + EXPECT_EQ(utf8.bytes[2], static_cast<char>(0xbf)); + EXPECT_EQ(utf8.bytes[3], static_cast<char>(0xbf)); +} + +TEST(Utf8ForCodePointTest, RejectsTheSmallestOverlargeCodePoint) { + Utf8ForCodePoint utf8(uint64_t{0x110000}); + EXPECT_EQ(utf8.length, 0); +} + +TEST(Utf8ForCodePointTest, RejectsAThroughlyOverlargeCodePoint) { + Utf8ForCodePoint utf8(uint64_t{0xffffffff00000000}); + EXPECT_EQ(utf8.length, 0); +} + +TEST(Utf8ForCodePointTest, OkReturnsTrueForAValidCodePoint) { + EXPECT_TRUE(Utf8ForCodePoint(uint64_t{0}).ok()); +} + +TEST(Utf8ForCodePointTest, OkReturnsFalseForAnInvalidCodePoint) { + EXPECT_FALSE(Utf8ForCodePoint(uint64_t{0xffffffff00000000}).ok()); +} + +} // namespace +} // namespace debugging_internal +ABSL_NAMESPACE_END +} // namespace absl |