aboutsummaryrefslogtreecommitdiff
path: root/absl/debugging/internal
diff options
context:
space:
mode:
Diffstat (limited to 'absl/debugging/internal')
-rw-r--r--absl/debugging/internal/bounded_utf8_length_sequence.h126
-rw-r--r--absl/debugging/internal/bounded_utf8_length_sequence_test.cc126
-rw-r--r--absl/debugging/internal/decode_rust_punycode.cc258
-rw-r--r--absl/debugging/internal/decode_rust_punycode.h55
-rw-r--r--absl/debugging/internal/decode_rust_punycode_test.cc606
-rw-r--r--absl/debugging/internal/demangle.cc1143
-rw-r--r--absl/debugging/internal/demangle.h3
-rw-r--r--absl/debugging/internal/demangle_rust.cc925
-rw-r--r--absl/debugging/internal/demangle_rust.h42
-rw-r--r--absl/debugging/internal/demangle_rust_test.cc584
-rw-r--r--absl/debugging/internal/demangle_test.cc1787
-rw-r--r--absl/debugging/internal/elf_mem_image.cc59
-rw-r--r--absl/debugging/internal/elf_mem_image.h15
-rw-r--r--absl/debugging/internal/stacktrace_aarch64-inl.inc17
-rw-r--r--absl/debugging/internal/utf8_for_code_point.cc70
-rw-r--r--absl/debugging/internal/utf8_for_code_point.h47
-rw-r--r--absl/debugging/internal/utf8_for_code_point_test.cc175
17 files changed, 5922 insertions, 116 deletions
diff --git a/absl/debugging/internal/bounded_utf8_length_sequence.h b/absl/debugging/internal/bounded_utf8_length_sequence.h
new file mode 100644
index 00000000..188e06c4
--- /dev/null
+++ b/absl/debugging/internal/bounded_utf8_length_sequence.h
@@ -0,0 +1,126 @@
+// Copyright 2024 The Abseil Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ABSL_DEBUGGING_INTERNAL_BOUNDED_UTF8_LENGTH_SEQUENCE_H_
+#define ABSL_DEBUGGING_INTERNAL_BOUNDED_UTF8_LENGTH_SEQUENCE_H_
+
+#include <cstdint>
+
+#include "absl/base/config.h"
+#include "absl/numeric/bits.h"
+
+namespace absl {
+ABSL_NAMESPACE_BEGIN
+namespace debugging_internal {
+
+// A sequence of up to max_elements integers between 1 and 4 inclusive, whose
+// insertion operation computes the sum of all the elements before the insertion
+// point. This is useful in decoding Punycode, where one needs to know where in
+// a UTF-8 byte stream the n-th code point begins.
+//
+// BoundedUtf8LengthSequence is async-signal-safe and suitable for use in
+// symbolizing stack traces in a signal handler, provided max_elements is not
+// improvidently large. For inputs of lengths accepted by the Rust demangler,
+// up to a couple hundred code points, InsertAndReturnSumOfPredecessors should
+// run in a few dozen clock cycles, on par with the other arithmetic required
+// for Punycode decoding.
+template <uint32_t max_elements>
+class BoundedUtf8LengthSequence {
+ public:
+ // Constructs an empty sequence.
+ BoundedUtf8LengthSequence() = default;
+
+ // Inserts `utf_length` at position `index`, shifting any existing elements at
+ // or beyond `index` one position to the right. If the sequence is already
+ // full, the rightmost element is discarded.
+ //
+ // Returns the sum of the elements at positions 0 to `index - 1` inclusive.
+ // If `index` is greater than the number of elements already inserted, the
+ // excess positions in the range count 1 apiece.
+ //
+ // REQUIRES: index < max_elements and 1 <= utf8_length <= 4.
+ uint32_t InsertAndReturnSumOfPredecessors(
+ uint32_t index, uint32_t utf8_length) {
+ // The caller shouldn't pass out-of-bounds inputs, but if it does happen,
+ // clamp the values and try to continue. If we're being called from a
+ // signal handler, the last thing we want to do is crash. Emitting
+ // malformed UTF-8 is a lesser evil.
+ if (index >= max_elements) index = max_elements - 1;
+ if (utf8_length == 0 || utf8_length > 4) utf8_length = 1;
+
+ const uint32_t word_index = index/32;
+ const uint32_t bit_index = 2 * (index % 32);
+ const uint64_t ones_bit = uint64_t{1} << bit_index;
+
+ // Compute the sum of predecessors.
+ // - Each value from 1 to 4 is represented by a bit field with value from
+ // 0 to 3, so the desired sum is index plus the sum of the
+ // representations actually stored.
+ // - For each bit field, a set low bit should contribute 1 to the sum, and
+ // a set high bit should contribute 2.
+ // - Another way to say the same thing is that each set bit contributes 1,
+ // and each set high bit contributes an additional 1.
+ // - So the sum we want is index + popcount(everything) + popcount(bits in
+ // odd positions).
+ const uint64_t odd_bits_mask = 0xaaaaaaaaaaaaaaaa;
+ const uint64_t lower_seminibbles_mask = ones_bit - 1;
+ const uint64_t higher_seminibbles_mask = ~lower_seminibbles_mask;
+ const uint64_t same_word_bits_below_insertion =
+ rep_[word_index] & lower_seminibbles_mask;
+ int full_popcount = absl::popcount(same_word_bits_below_insertion);
+ int odd_popcount =
+ absl::popcount(same_word_bits_below_insertion & odd_bits_mask);
+ for (uint32_t j = word_index; j > 0; --j) {
+ const uint64_t word_below_insertion = rep_[j - 1];
+ full_popcount += absl::popcount(word_below_insertion);
+ odd_popcount += absl::popcount(word_below_insertion & odd_bits_mask);
+ }
+ const uint32_t sum_of_predecessors =
+ index + static_cast<uint32_t>(full_popcount + odd_popcount);
+
+ // Now insert utf8_length's representation, shifting successors up one
+ // place.
+ for (uint32_t j = max_elements/32 - 1; j > word_index; --j) {
+ rep_[j] = (rep_[j] << 2) | (rep_[j - 1] >> 62);
+ }
+ rep_[word_index] =
+ (rep_[word_index] & lower_seminibbles_mask) |
+ (uint64_t{utf8_length - 1} << bit_index) |
+ ((rep_[word_index] & higher_seminibbles_mask) << 2);
+
+ return sum_of_predecessors;
+ }
+
+ private:
+ // If the (32 * i + j)-th element of the represented sequence has the value k
+ // (0 <= j < 32, 1 <= k <= 4), then bits 2 * j and 2 * j + 1 of rep_[i]
+ // contain the seminibble (k - 1).
+ //
+ // In particular, the zero-initialization of rep_ makes positions not holding
+ // any inserted element count as 1 in InsertAndReturnSumOfPredecessors.
+ //
+ // Example: rep_ = {0xb1, ... the rest zeroes ...} represents the sequence
+ // (2, 1, 4, 3, ... the rest 1's ...). Constructing the sequence of Unicode
+ // code points "Àa🂻中" = {U+00C0, U+0061, U+1F0BB, U+4E2D} (among many
+ // other examples) would yield this value of rep_.
+ static_assert(max_elements > 0 && max_elements % 32 == 0,
+ "max_elements must be a positive multiple of 32");
+ uint64_t rep_[max_elements/32] = {};
+};
+
+} // namespace debugging_internal
+ABSL_NAMESPACE_END
+} // namespace absl
+
+#endif // ABSL_DEBUGGING_INTERNAL_BOUNDED_UTF8_LENGTH_SEQUENCE_H_
diff --git a/absl/debugging/internal/bounded_utf8_length_sequence_test.cc b/absl/debugging/internal/bounded_utf8_length_sequence_test.cc
new file mode 100644
index 00000000..17a24fd1
--- /dev/null
+++ b/absl/debugging/internal/bounded_utf8_length_sequence_test.cc
@@ -0,0 +1,126 @@
+// Copyright 2024 The Abseil Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/debugging/internal/bounded_utf8_length_sequence.h"
+
+#include <cstdint>
+
+#include "gtest/gtest.h"
+#include "absl/base/config.h"
+
+namespace absl {
+ABSL_NAMESPACE_BEGIN
+namespace debugging_internal {
+namespace {
+
+TEST(BoundedUtf8LengthSequenceTest, RemembersAValueOfOneCorrectly) {
+ BoundedUtf8LengthSequence<32> seq;
+ ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(0, 1), 0);
+ EXPECT_EQ(seq.InsertAndReturnSumOfPredecessors(1, 1), 1);
+}
+
+TEST(BoundedUtf8LengthSequenceTest, RemembersAValueOfTwoCorrectly) {
+ BoundedUtf8LengthSequence<32> seq;
+ ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(0, 2), 0);
+ EXPECT_EQ(seq.InsertAndReturnSumOfPredecessors(1, 1), 2);
+}
+
+TEST(BoundedUtf8LengthSequenceTest, RemembersAValueOfThreeCorrectly) {
+ BoundedUtf8LengthSequence<32> seq;
+ ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(0, 3), 0);
+ EXPECT_EQ(seq.InsertAndReturnSumOfPredecessors(1, 1), 3);
+}
+
+TEST(BoundedUtf8LengthSequenceTest, RemembersAValueOfFourCorrectly) {
+ BoundedUtf8LengthSequence<32> seq;
+ ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(0, 4), 0);
+ EXPECT_EQ(seq.InsertAndReturnSumOfPredecessors(1, 1), 4);
+}
+
+TEST(BoundedUtf8LengthSequenceTest, RemembersSeveralAppendedValues) {
+ BoundedUtf8LengthSequence<32> seq;
+ ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(0, 1), 0);
+ ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(1, 4), 1);
+ ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(2, 2), 5);
+ ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(3, 3), 7);
+ ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(4, 1), 10);
+}
+
+TEST(BoundedUtf8LengthSequenceTest, RemembersSeveralPrependedValues) {
+ BoundedUtf8LengthSequence<32> seq;
+ ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(0, 4), 0);
+ ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(0, 3), 0);
+ ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(0, 2), 0);
+ ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(0, 1), 0);
+ ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(4, 1), 10);
+ ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(3, 1), 6);
+ ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(2, 1), 3);
+ ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(1, 1), 1);
+}
+
+TEST(BoundedUtf8LengthSequenceTest, RepeatedInsertsShiftValuesOutTheRightEnd) {
+ BoundedUtf8LengthSequence<32> seq;
+ ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(0, 2), 0);
+ for (uint32_t i = 1; i < 31; ++i) {
+ ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(0, 1), 0)
+ << "while moving the 2 into position " << i;
+ ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(31, 1), 32)
+ << "after moving the 2 into position " << i;
+ }
+ ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(0, 1), 0)
+ << "while moving the 2 into position 31";
+ EXPECT_EQ(seq.InsertAndReturnSumOfPredecessors(31, 1), 31)
+ << "after moving the 2 into position 31";
+}
+
+TEST(BoundedUtf8LengthSequenceTest, InsertsIntoWord1LeaveWord0Untouched) {
+ BoundedUtf8LengthSequence<64> seq;
+ for (uint32_t i = 0; i < 32; ++i) {
+ ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(i, 2), 2 * i)
+ << "at index " << i;
+ }
+ EXPECT_EQ(seq.InsertAndReturnSumOfPredecessors(32, 1), 64);
+ EXPECT_EQ(seq.InsertAndReturnSumOfPredecessors(32, 1), 64);
+}
+
+TEST(BoundedUtf8LengthSequenceTest, InsertsIntoWord0ShiftValuesIntoWord1) {
+ BoundedUtf8LengthSequence<64> seq;
+ ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(29, 2), 29);
+ ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(30, 3), 31);
+ ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(31, 4), 34);
+
+ // Pushing two 1's on the front moves the 3 and 4 into the high word.
+ ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(0, 1), 0);
+ ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(0, 1), 0);
+ ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(34, 1), 31 + 2 + 3 + 4);
+ ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(32, 1), 31 + 2);
+}
+
+TEST(BoundedUtf8LengthSequenceTest, ValuesAreShiftedCorrectlyAmongThreeWords) {
+ BoundedUtf8LengthSequence<96> seq;
+ ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(31, 3), 31);
+ ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(63, 4), 62 + 3);
+
+ // This insertion moves both the 3 and the 4 up a word.
+ ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(0, 1), 0);
+ ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(65, 1), 63 + 3 + 4);
+ ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(64, 1), 63 + 3);
+ ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(33, 1), 32 + 3);
+ ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(32, 1), 32);
+}
+
+} // namespace
+} // namespace debugging_internal
+ABSL_NAMESPACE_END
+} // namespace absl
diff --git a/absl/debugging/internal/decode_rust_punycode.cc b/absl/debugging/internal/decode_rust_punycode.cc
new file mode 100644
index 00000000..43b46bf9
--- /dev/null
+++ b/absl/debugging/internal/decode_rust_punycode.cc
@@ -0,0 +1,258 @@
+// Copyright 2024 The Abseil Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/debugging/internal/decode_rust_punycode.h"
+
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+
+#include "absl/base/config.h"
+#include "absl/base/nullability.h"
+#include "absl/debugging/internal/bounded_utf8_length_sequence.h"
+#include "absl/debugging/internal/utf8_for_code_point.h"
+
+namespace absl {
+ABSL_NAMESPACE_BEGIN
+namespace debugging_internal {
+
+namespace {
+
+// Decoding Punycode requires repeated random-access insertion into a stream of
+// variable-length UTF-8 code-point encodings. We need this to be tolerably
+// fast (no N^2 slowdown for unfortunate inputs), and we can't allocate any data
+// structures on the heap (async-signal-safety).
+//
+// It is pragmatic to impose a moderately low limit on the identifier length and
+// bail out if we ever hit it. Then BoundedUtf8LengthSequence efficiently
+// determines where to insert the next code point, and memmove efficiently makes
+// room for it.
+//
+// The chosen limit is a round number several times larger than identifiers
+// expected in practice, yet still small enough that a memmove of this many
+// UTF-8 characters is not much more expensive than the division and modulus
+// operations that Punycode decoding requires.
+constexpr uint32_t kMaxChars = 256;
+
+// Constants from RFC 3492 section 5.
+constexpr uint32_t kBase = 36, kTMin = 1, kTMax = 26, kSkew = 38, kDamp = 700;
+
+constexpr uint32_t kMaxCodePoint = 0x10ffff;
+
+// Overflow threshold in DecodeRustPunycode's inner loop; see comments there.
+constexpr uint32_t kMaxI = 1 << 30;
+
+// If punycode_begin .. punycode_end begins with a prefix matching the regular
+// expression [0-9a-zA-Z_]+_, removes that prefix, copies all but the final
+// underscore into out_begin .. out_end, sets num_ascii_chars to the number of
+// bytes copied, and returns true. (A prefix of this sort represents the
+// nonempty subsequence of ASCII characters in the corresponding plaintext.)
+//
+// If punycode_begin .. punycode_end does not contain an underscore, sets
+// num_ascii_chars to zero and returns true. (The encoding of a plaintext
+// without any ASCII characters does not carry such a prefix.)
+//
+// Returns false and zeroes num_ascii_chars on failure (either parse error or
+// not enough space in the output buffer).
+bool ConsumeOptionalAsciiPrefix(const char*& punycode_begin,
+ const char* const punycode_end,
+ char* const out_begin,
+ char* const out_end,
+ uint32_t& num_ascii_chars) {
+ num_ascii_chars = 0;
+
+ // Remember the last underscore if any. Also use the same string scan to
+ // reject any ASCII bytes that do not belong in an identifier, including NUL,
+ // as well as non-ASCII bytes, which should have been delta-encoded instead.
+ int last_underscore = -1;
+ for (int i = 0; i < punycode_end - punycode_begin; ++i) {
+ const char c = punycode_begin[i];
+ if (c == '_') {
+ last_underscore = i;
+ continue;
+ }
+ // We write out the meaning of absl::ascii_isalnum rather than call that
+ // function because its documentation does not promise it will remain
+ // async-signal-safe under future development.
+ if ('a' <= c && c <= 'z') continue;
+ if ('A' <= c && c <= 'Z') continue;
+ if ('0' <= c && c <= '9') continue;
+ return false;
+ }
+
+ // If there was no underscore, that means there were no ASCII characters in
+ // the plaintext, so there is no prefix to consume. Our work is done.
+ if (last_underscore < 0) return true;
+
+ // Otherwise there will be an underscore delimiter somewhere. It can't be
+ // initial because then there would be no ASCII characters to its left, and no
+ // delimiter would have been added in that case.
+ if (last_underscore == 0) return false;
+
+ // Any other position is reasonable. Make sure there's room in the buffer.
+ if (last_underscore + 1 > out_end - out_begin) return false;
+
+ // Consume and write out the ASCII characters.
+ num_ascii_chars = static_cast<uint32_t>(last_underscore);
+ std::memcpy(out_begin, punycode_begin, num_ascii_chars);
+ out_begin[num_ascii_chars] = '\0';
+ punycode_begin += num_ascii_chars + 1;
+ return true;
+}
+
+// Returns the value of `c` as a base-36 digit according to RFC 3492 section 5,
+// or -1 if `c` is not such a digit.
+int DigitValue(char c) {
+ if ('0' <= c && c <= '9') return c - '0' + 26;
+ if ('a' <= c && c <= 'z') return c - 'a';
+ if ('A' <= c && c <= 'Z') return c - 'A';
+ return -1;
+}
+
+// Consumes the next delta encoding from punycode_begin .. punycode_end,
+// updating i accordingly. Returns true on success. Returns false on parse
+// failure or arithmetic overflow.
+bool ScanNextDelta(const char*& punycode_begin, const char* const punycode_end,
+ uint32_t bias, uint32_t& i) {
+ uint64_t w = 1; // 64 bits to prevent overflow in w *= kBase - t
+
+ // "for k = base to infinity in steps of base do begin ... end" in RFC 3492
+ // section 6.2. Each loop iteration scans one digit of the delta.
+ for (uint32_t k = kBase; punycode_begin != punycode_end; k += kBase) {
+ const int digit_value = DigitValue(*punycode_begin++);
+ if (digit_value < 0) return false;
+
+ // Compute this in 64-bit arithmetic so we can check for overflow afterward.
+ const uint64_t new_i = i + static_cast<uint64_t>(digit_value) * w;
+
+ // Valid deltas are bounded by (#chars already emitted) * kMaxCodePoint, but
+ // invalid input could encode an arbitrarily large delta. Nip that in the
+ // bud here.
+ static_assert(
+ kMaxI >= kMaxChars * kMaxCodePoint,
+ "kMaxI is too small to prevent spurious failures on good input");
+ if (new_i > kMaxI) return false;
+
+ static_assert(
+ kMaxI < (uint64_t{1} << 32),
+ "Make kMaxI smaller or i 64 bits wide to prevent silent wraparound");
+ i = static_cast<uint32_t>(new_i);
+
+ // Compute the threshold that determines whether this is the last digit and
+ // (if not) what the next digit's place value will be. This logic from RFC
+ // 3492 section 6.2 is explained in section 3.3.
+ uint32_t t;
+ if (k <= bias + kTMin) {
+ t = kTMin;
+ } else if (k >= bias + kTMax) {
+ t = kTMax;
+ } else {
+ t = k - bias;
+ }
+ if (static_cast<uint32_t>(digit_value) < t) return true;
+
+ // If this gets too large, the range check on new_i in the next iteration
+ // will catch it. We know this multiplication will not overwrap because w
+ // is 64 bits wide.
+ w *= kBase - t;
+ }
+ return false;
+}
+
+} // namespace
+
+absl::Nullable<char*> DecodeRustPunycode(DecodeRustPunycodeOptions options) {
+ const char* punycode_begin = options.punycode_begin;
+ const char* const punycode_end = options.punycode_end;
+ char* const out_begin = options.out_begin;
+ char* const out_end = options.out_end;
+
+ // Write a NUL terminator first. Later memcpy calls will keep bumping it
+ // along to its new right place.
+ const size_t out_size = static_cast<size_t>(out_end - out_begin);
+ if (out_size == 0) return nullptr;
+ *out_begin = '\0';
+
+ // RFC 3492 section 6.2 begins here. We retain the names of integer variables
+ // appearing in that text.
+ uint32_t n = 128, i = 0, bias = 72, num_chars = 0;
+
+ // If there are any ASCII characters, consume them and their trailing
+ // underscore delimiter.
+ if (!ConsumeOptionalAsciiPrefix(punycode_begin, punycode_end,
+ out_begin, out_end, num_chars)) {
+ return nullptr;
+ }
+ uint32_t total_utf8_bytes = num_chars;
+
+ BoundedUtf8LengthSequence<kMaxChars> utf8_lengths;
+
+ // "while the input is not exhausted do begin ... end"
+ while (punycode_begin != punycode_end) {
+ if (num_chars >= kMaxChars) return nullptr;
+
+ const uint32_t old_i = i;
+
+ if (!ScanNextDelta(punycode_begin, punycode_end, bias, i)) return nullptr;
+
+ // Update bias as in RFC 3492 section 6.1. (We have inlined adapt.)
+ uint32_t delta = i - old_i;
+ delta /= (old_i == 0 ? kDamp : 2);
+ delta += delta/(num_chars + 1);
+ bias = 0;
+ while (delta > ((kBase - kTMin) * kTMax)/2) {
+ delta /= kBase - kTMin;
+ bias += kBase;
+ }
+ bias += ((kBase - kTMin + 1) * delta)/(delta + kSkew);
+
+ // Back in section 6.2, compute the new code point and insertion index.
+ static_assert(
+ kMaxI + kMaxCodePoint < (uint64_t{1} << 32),
+ "Make kMaxI smaller or n 64 bits wide to prevent silent wraparound");
+ n += i/(num_chars + 1);
+ i %= num_chars + 1;
+
+ // To actually insert, we need to convert the code point n to UTF-8 and the
+ // character index i to an index into the byte stream emitted so far. First
+ // prepare the UTF-8 encoding for n, rejecting surrogates, overlarge values,
+ // and anything that won't fit into the remaining output storage.
+ Utf8ForCodePoint utf8_for_code_point(n);
+ if (!utf8_for_code_point.ok()) return nullptr;
+ if (total_utf8_bytes + utf8_for_code_point.length + 1 > out_size) {
+ return nullptr;
+ }
+
+ // Now insert the new character into both our length map and the output.
+ uint32_t n_index =
+ utf8_lengths.InsertAndReturnSumOfPredecessors(
+ i, utf8_for_code_point.length);
+ std::memmove(
+ out_begin + n_index + utf8_for_code_point.length, out_begin + n_index,
+ total_utf8_bytes + 1 - n_index);
+ std::memcpy(out_begin + n_index, utf8_for_code_point.bytes,
+ utf8_for_code_point.length);
+ total_utf8_bytes += utf8_for_code_point.length;
+ ++num_chars;
+
+ // Finally, advance to the next state before continuing.
+ ++i;
+ }
+
+ return out_begin + total_utf8_bytes;
+}
+
+} // namespace debugging_internal
+ABSL_NAMESPACE_END
+} // namespace absl
diff --git a/absl/debugging/internal/decode_rust_punycode.h b/absl/debugging/internal/decode_rust_punycode.h
new file mode 100644
index 00000000..0ae53ff3
--- /dev/null
+++ b/absl/debugging/internal/decode_rust_punycode.h
@@ -0,0 +1,55 @@
+// Copyright 2024 The Abseil Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ABSL_DEBUGGING_INTERNAL_DECODE_RUST_PUNYCODE_H_
+#define ABSL_DEBUGGING_INTERNAL_DECODE_RUST_PUNYCODE_H_
+
+#include "absl/base/config.h"
+#include "absl/base/nullability.h"
+
+namespace absl {
+ABSL_NAMESPACE_BEGIN
+namespace debugging_internal {
+
+struct DecodeRustPunycodeOptions {
+ const char* punycode_begin;
+ const char* punycode_end;
+ char* out_begin;
+ char* out_end;
+};
+
+// Given Rust Punycode in `punycode_begin .. punycode_end`, writes the
+// corresponding UTF-8 plaintext into `out_begin .. out_end`, followed by a NUL
+// character, and returns a pointer to that final NUL on success. On failure
+// returns a null pointer, and the contents of `out_begin .. out_end` are
+// unspecified.
+//
+// Failure occurs in precisely these cases:
+// - Any input byte does not match [0-9a-zA-Z_].
+// - The first input byte is an underscore, but no other underscore appears in
+// the input.
+// - The delta sequence does not represent a valid sequence of code-point
+// insertions.
+// - The plaintext would contain more than 256 code points.
+//
+// DecodeRustPunycode is async-signal-safe with bounded runtime and a small
+// stack footprint, making it suitable for use in demangling Rust symbol names
+// from a signal handler.
+absl::Nullable<char*> DecodeRustPunycode(DecodeRustPunycodeOptions options);
+
+} // namespace debugging_internal
+ABSL_NAMESPACE_END
+} // namespace absl
+
+#endif // ABSL_DEBUGGING_INTERNAL_DECODE_RUST_PUNYCODE_H_
diff --git a/absl/debugging/internal/decode_rust_punycode_test.cc b/absl/debugging/internal/decode_rust_punycode_test.cc
new file mode 100644
index 00000000..78d1c332
--- /dev/null
+++ b/absl/debugging/internal/decode_rust_punycode_test.cc
@@ -0,0 +1,606 @@
+// Copyright 2024 The Abseil Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/debugging/internal/decode_rust_punycode.h"
+
+#include <cstddef>
+#include <cstring>
+#include <string>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/base/config.h"
+
+namespace absl {
+ABSL_NAMESPACE_BEGIN
+namespace debugging_internal {
+namespace {
+
+using ::testing::AllOf;
+using ::testing::Eq;
+using ::testing::IsNull;
+using ::testing::Pointee;
+using ::testing::ResultOf;
+using ::testing::StrEq;
+
+class DecodeRustPunycodeTest : public ::testing::Test {
+ protected:
+ void FillBufferWithNonzeroBytes() {
+ // The choice of nonzero value to fill with is arbitrary. The point is just
+ // to fail tests if DecodeRustPunycode forgets to write the final NUL
+ // character.
+ std::memset(buffer_storage_, 0xab, sizeof(buffer_storage_));
+ }
+
+ DecodeRustPunycodeOptions WithAmpleSpace() {
+ FillBufferWithNonzeroBytes();
+
+ DecodeRustPunycodeOptions options;
+ options.punycode_begin = punycode_.data();
+ options.punycode_end = punycode_.data() + punycode_.size();
+ options.out_begin = buffer_storage_;
+ options.out_end = buffer_storage_ + sizeof(buffer_storage_);
+ return options;
+ }
+
+ DecodeRustPunycodeOptions WithJustEnoughSpace() {
+ FillBufferWithNonzeroBytes();
+
+ const size_t begin_offset = sizeof(buffer_storage_) - plaintext_.size() - 1;
+ DecodeRustPunycodeOptions options;
+ options.punycode_begin = punycode_.data();
+ options.punycode_end = punycode_.data() + punycode_.size();
+ options.out_begin = buffer_storage_ + begin_offset;
+ options.out_end = buffer_storage_ + sizeof(buffer_storage_);
+ return options;
+ }
+
+ DecodeRustPunycodeOptions WithOneByteTooFew() {
+ FillBufferWithNonzeroBytes();
+
+ const size_t begin_offset = sizeof(buffer_storage_) - plaintext_.size();
+ DecodeRustPunycodeOptions options;
+ options.punycode_begin = punycode_.data();
+ options.punycode_end = punycode_.data() + punycode_.size();
+ options.out_begin = buffer_storage_ + begin_offset;
+ options.out_end = buffer_storage_ + sizeof(buffer_storage_);
+ return options;
+ }
+
+ // Matches a correct return value of DecodeRustPunycode when `golden` is the
+ // expected plaintext output.
+ auto PointsToTheNulAfter(const std::string& golden) {
+ const size_t golden_size = golden.size();
+ return AllOf(
+ Pointee(Eq('\0')),
+ ResultOf("preceding string body",
+ [golden_size](const char* p) { return p - golden_size; },
+ StrEq(golden)));
+ }
+
+ std::string punycode_;
+ std::string plaintext_;
+ char buffer_storage_[1024];
+};
+
+TEST_F(DecodeRustPunycodeTest, MapsEmptyToEmpty) {
+ punycode_ = "";
+ plaintext_ = "";
+
+ ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
+ PointsToTheNulAfter(plaintext_));
+ ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
+ PointsToTheNulAfter(plaintext_));
+ EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
+}
+
+TEST_F(DecodeRustPunycodeTest,
+ StripsTheTrailingDelimiterFromAPureRunOfBasicChars) {
+ punycode_ = "foo_";
+ plaintext_ = "foo";
+
+ ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
+ PointsToTheNulAfter(plaintext_));
+ ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
+ PointsToTheNulAfter(plaintext_));
+ EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
+}
+
+TEST_F(DecodeRustPunycodeTest, TreatsTheLastUnderscoreAsTheDelimiter) {
+ punycode_ = "foo_bar_";
+ plaintext_ = "foo_bar";
+
+ ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
+ PointsToTheNulAfter(plaintext_));
+ ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
+ PointsToTheNulAfter(plaintext_));
+ EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
+}
+
+TEST_F(DecodeRustPunycodeTest, AcceptsALeadingUnderscoreIfNotTheDelimiter) {
+ punycode_ = "_foo_";
+ plaintext_ = "_foo";
+
+ ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
+ PointsToTheNulAfter(plaintext_));
+ ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
+ PointsToTheNulAfter(plaintext_));
+ EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
+}
+
+TEST_F(DecodeRustPunycodeTest, RejectsALeadingUnderscoreDelimiter) {
+ punycode_ = "_foo";
+
+ EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
+}
+
+TEST_F(DecodeRustPunycodeTest, RejectsEmbeddedNul) {
+ punycode_ = std::string("foo\0bar_", 8);
+
+ EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
+}
+
+TEST_F(DecodeRustPunycodeTest, RejectsAsciiCharsOtherThanIdentifierChars) {
+ punycode_ = "foo\007_";
+ EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
+
+ punycode_ = "foo-_";
+ EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
+
+ punycode_ = "foo;_";
+ EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
+
+ punycode_ = "foo\177_";
+ EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
+}
+
+TEST_F(DecodeRustPunycodeTest, RejectsRawNonAsciiChars) {
+ punycode_ = "\x80";
+ EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
+
+ punycode_ = "\x80_";
+ EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
+
+ punycode_ = "\xff";
+ EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
+
+ punycode_ = "\xff_";
+ EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
+}
+
+TEST_F(DecodeRustPunycodeTest, RecognizesU0080) {
+ // a encodes 0, so the output is the smallest non-ASCII code point standing
+ // alone. (U+0080 PAD is not an identifier character, but DecodeRustPunycode
+ // does not check whether non-ASCII characters could belong to an identifier.)
+ punycode_ = "a";
+ plaintext_ = "\xc2\x80";
+
+ ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
+ PointsToTheNulAfter(plaintext_));
+ ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
+ PointsToTheNulAfter(plaintext_));
+ EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
+}
+
+TEST_F(DecodeRustPunycodeTest, OneByteDeltaSequencesMustBeA) {
+ // Because bias = 72 for the first code point, any digit but a/A is nonfinal
+ // in one of the first two bytes of a delta sequence.
+ punycode_ = "b";
+ EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
+
+ punycode_ = "z";
+ EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
+
+ punycode_ = "0";
+ EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
+
+ punycode_ = "9";
+ EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
+}
+
+TEST_F(DecodeRustPunycodeTest, AcceptsDeltaSequenceBA) {
+ punycode_ = "ba";
+ plaintext_ = "\xc2\x81";
+
+ ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
+ PointsToTheNulAfter(plaintext_));
+ ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
+ PointsToTheNulAfter(plaintext_));
+ EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
+}
+
+TEST_F(DecodeRustPunycodeTest, AcceptsOtherDeltaSequencesWithSecondByteA) {
+ punycode_ = "ca";
+ plaintext_ = "\xc2\x82";
+ EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
+ PointsToTheNulAfter(plaintext_));
+
+ punycode_ = "za";
+ plaintext_ = "\xc2\x99";
+ EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
+ PointsToTheNulAfter(plaintext_));
+
+ punycode_ = "0a";
+ plaintext_ = "\xc2\x9a";
+ EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
+ PointsToTheNulAfter(plaintext_));
+
+ punycode_ = "1a";
+ plaintext_ = "\xc2\x9b";
+ EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
+ PointsToTheNulAfter(plaintext_));
+
+ punycode_ = "9a";
+ plaintext_ = "£"; // Pound sign, U+00A3
+ EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
+ PointsToTheNulAfter(plaintext_));
+}
+
+TEST_F(DecodeRustPunycodeTest, RejectsDeltaWhereTheSecondAndLastDigitIsNotA) {
+ punycode_ = "bb";
+ EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
+
+ punycode_ = "zz";
+ EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
+
+ punycode_ = "00";
+ EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
+
+ punycode_ = "99";
+ EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
+}
+
+TEST_F(DecodeRustPunycodeTest, AcceptsDeltasWithSecondByteBFollowedByA) {
+ punycode_ = "bba";
+ plaintext_ = "¤"; // U+00A4
+ EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
+ PointsToTheNulAfter(plaintext_));
+
+ punycode_ = "cba";
+ plaintext_ = "¥"; // U+00A5
+ EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
+ PointsToTheNulAfter(plaintext_));
+
+ punycode_ = "zba";
+ plaintext_ = "¼"; // U+00BC
+ EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
+ PointsToTheNulAfter(plaintext_));
+
+ punycode_ = "0ba";
+ plaintext_ = "½"; // U+00BD
+ EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
+ PointsToTheNulAfter(plaintext_));
+
+ punycode_ = "1ba";
+ plaintext_ = "¾"; // U+00BE
+ EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
+ PointsToTheNulAfter(plaintext_));
+
+ punycode_ = "9ba";
+ plaintext_ = "Æ"; // U+00C6
+ EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
+ PointsToTheNulAfter(plaintext_));
+}
+
+// Tests beyond this point use characters allowed in identifiers, so you can
+// prepend _RNvC1cu<decimal length><underscore if [0-9_] follows> to a test
+// input and run it through another Rust demangler to verify that the
+// corresponding golden output is correct.
+
+TEST_F(DecodeRustPunycodeTest, AcceptsTwoByteCharAlone) {
+ punycode_ = "0ca";
+ plaintext_ = "à";
+
+ ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
+ PointsToTheNulAfter(plaintext_));
+ ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
+ PointsToTheNulAfter(plaintext_));
+ EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
+}
+
+TEST_F(DecodeRustPunycodeTest, AcceptsTwoByteCharBeforeBasicChars) {
+ punycode_ = "_la_mode_yya";
+ plaintext_ = "à_la_mode";
+
+ ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
+ PointsToTheNulAfter(plaintext_));
+ ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
+ PointsToTheNulAfter(plaintext_));
+ EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
+}
+
+TEST_F(DecodeRustPunycodeTest, AcceptsTwoByteCharAmidBasicChars) {
+ punycode_ = "verre__vin_m4a";
+ plaintext_ = "verre_à_vin";
+
+ ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
+ PointsToTheNulAfter(plaintext_));
+ ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
+ PointsToTheNulAfter(plaintext_));
+ EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
+}
+
+TEST_F(DecodeRustPunycodeTest, AcceptsTwoByteCharAfterBasicChars) {
+ punycode_ = "belt_3na";
+ plaintext_ = "beltà";
+
+ ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
+ PointsToTheNulAfter(plaintext_));
+ ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
+ PointsToTheNulAfter(plaintext_));
+ EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
+}
+
+TEST_F(DecodeRustPunycodeTest, AcceptsRepeatedTwoByteChar) {
+ punycode_ = "0caaaa";
+ plaintext_ = "àààà";
+
+ ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
+ PointsToTheNulAfter(plaintext_));
+ ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
+ PointsToTheNulAfter(plaintext_));
+ EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
+}
+
+TEST_F(DecodeRustPunycodeTest, AcceptsNearbyTwoByteCharsInOrder) {
+ punycode_ = "3camsuz";
+ plaintext_ = "ãéïôù";
+
+ ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
+ PointsToTheNulAfter(plaintext_));
+ ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
+ PointsToTheNulAfter(plaintext_));
+ EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
+}
+
+TEST_F(DecodeRustPunycodeTest, AcceptsNearbyTwoByteCharsOutOfOrder) {
+ punycode_ = "3caltsx";
+ plaintext_ = "ùéôãï";
+
+ ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
+ PointsToTheNulAfter(plaintext_));
+ ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
+ PointsToTheNulAfter(plaintext_));
+ EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
+}
+
+TEST_F(DecodeRustPunycodeTest, AcceptsThreeByteCharAlone) {
+ punycode_ = "fiq";
+ plaintext_ = "中";
+
+ ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
+ PointsToTheNulAfter(plaintext_));
+ ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
+ PointsToTheNulAfter(plaintext_));
+ EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
+}
+
+TEST_F(DecodeRustPunycodeTest, AcceptsRepeatedThreeByteChar) {
+ punycode_ = "fiqaaaa";
+ plaintext_ = "中中中中中";
+
+ ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
+ PointsToTheNulAfter(plaintext_));
+ ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
+ PointsToTheNulAfter(plaintext_));
+ EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
+}
+
+TEST_F(DecodeRustPunycodeTest, AcceptsThreeByteCharsInOrder) {
+ punycode_ = "fiq228c";
+ plaintext_ = "中文";
+
+ ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
+ PointsToTheNulAfter(plaintext_));
+ ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
+ PointsToTheNulAfter(plaintext_));
+ EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
+}
+
+TEST_F(DecodeRustPunycodeTest, AcceptsNearbyThreeByteCharsOutOfOrder) {
+ punycode_ = "fiq128c";
+ plaintext_ = "文中";
+
+ ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
+ PointsToTheNulAfter(plaintext_));
+ ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
+ PointsToTheNulAfter(plaintext_));
+ EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
+}
+
+TEST_F(DecodeRustPunycodeTest, AcceptsFourByteCharAlone) {
+ punycode_ = "uy7h";
+ plaintext_ = "🂻";
+
+ ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
+ PointsToTheNulAfter(plaintext_));
+ ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
+ PointsToTheNulAfter(plaintext_));
+ EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
+}
+
+TEST_F(DecodeRustPunycodeTest, AcceptsFourByteCharBeforeBasicChars) {
+ punycode_ = "jack__uh63d";
+ plaintext_ = "jack_🂻";
+
+ ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
+ PointsToTheNulAfter(plaintext_));
+ ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
+ PointsToTheNulAfter(plaintext_));
+ EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
+}
+
+TEST_F(DecodeRustPunycodeTest, AcceptsFourByteCharAmidBasicChars) {
+ punycode_ = "jack__of_hearts_ki37n";
+ plaintext_ = "jack_🂻_of_hearts";
+
+ ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
+ PointsToTheNulAfter(plaintext_));
+ ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
+ PointsToTheNulAfter(plaintext_));
+ EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
+}
+
+TEST_F(DecodeRustPunycodeTest, AcceptsFourByteCharAfterBasicChars) {
+ punycode_ = "_of_hearts_kz45i";
+ plaintext_ = "🂻_of_hearts";
+
+ ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
+ PointsToTheNulAfter(plaintext_));
+ ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
+ PointsToTheNulAfter(plaintext_));
+ EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
+}
+
+TEST_F(DecodeRustPunycodeTest, AcceptsRepeatedFourByteChar) {
+ punycode_ = "uy7haaaa";
+ plaintext_ = "🂻🂻🂻🂻🂻";
+
+ ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
+ PointsToTheNulAfter(plaintext_));
+ ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
+ PointsToTheNulAfter(plaintext_));
+ EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
+}
+
+TEST_F(DecodeRustPunycodeTest, AcceptsNearbyFourByteCharsInOrder) {
+ punycode_ = "8x7hcjmf";
+ plaintext_ = "🂦🂧🂪🂭🂮";
+
+ ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
+ PointsToTheNulAfter(plaintext_));
+ ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
+ PointsToTheNulAfter(plaintext_));
+ EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
+}
+
+TEST_F(DecodeRustPunycodeTest, AcceptsNearbyFourByteCharsOutOfOrder) {
+ punycode_ = "8x7hcild";
+ plaintext_ = "🂮🂦🂭🂪🂧";
+
+ ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
+ PointsToTheNulAfter(plaintext_));
+ ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
+ PointsToTheNulAfter(plaintext_));
+ EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
+}
+
+TEST_F(DecodeRustPunycodeTest, AcceptsAMixtureOfByteLengths) {
+ punycode_ = "3caltsx2079ivf8aiuy7cja3a6ak";
+ plaintext_ = "ùéôãï中文🂮🂦🂭🂪🂧";
+
+ ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
+ PointsToTheNulAfter(plaintext_));
+ ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
+ PointsToTheNulAfter(plaintext_));
+ EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
+}
+
+TEST_F(DecodeRustPunycodeTest, RejectsOverlargeDeltas) {
+ punycode_ = "123456789a";
+
+ EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
+}
+
+// Finally, we test on a few prose and poetry snippets as a defense in depth.
+// If our artificial short test inputs did not exercise a bug that is tickled by
+// patterns typical of real human writing, maybe real human writing will catch
+// that.
+//
+// These test inputs are extracted from texts old enough to be out of copyright
+// that probe a variety of ranges of code-point space. All are longer than 32
+// code points, so they exercise the carrying of seminibbles from one uint64_t
+// to the next higher one in BoundedUtf8LengthSequence.
+
+// The first three lines of the Old English epic _Beowulf_, mostly ASCII with a
+// few archaic two-byte letters interspersed.
+TEST_F(DecodeRustPunycodeTest, Beowulf) {
+ punycode_ = "hwt_we_gardena_in_geardagum_"
+ "eodcyninga_rym_gefrunon_"
+ "hu_a_elingas_ellen_fremedon_hxg9c70do9alau";
+ plaintext_ = "hwæt_we_gardena_in_geardagum_"
+ "þeodcyninga_þrym_gefrunon_"
+ "hu_ða_æþelingas_ellen_fremedon";
+
+ ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
+ PointsToTheNulAfter(plaintext_));
+ ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
+ PointsToTheNulAfter(plaintext_));
+ EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
+}
+
+// The whole of 過故人莊 by the 8th-century Chinese poet 孟浩然
+// (Meng Haoran), exercising three-byte-character processing.
+TEST_F(DecodeRustPunycodeTest, MengHaoran) {
+ punycode_ = "gmq4ss0cfvao1e2wg8mcw8b0wkl9a7tt90a8riuvbk7t8kbv9a66ogofvzlf6"
+ "3d01ybn1u28dyqi5q2cxyyxnk5d2gx1ks9ddvfm17bk6gbsd6wftrav60u4ta";
+ plaintext_ = "故人具雞黍" "邀我至田家"
+ "綠樹村邊合" "青山郭外斜"
+ "開軒面場圃" "把酒話桑麻"
+ "待到重陽日" "還來就菊花";
+
+ ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
+ PointsToTheNulAfter(plaintext_));
+ ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
+ PointsToTheNulAfter(plaintext_));
+ EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
+}
+
+// A poem of the 8th-century Japanese poet 山上憶良 (Yamanoue no Okura).
+// Japanese mixes two-byte and three-byte characters: a good workout for codecs.
+TEST_F(DecodeRustPunycodeTest, YamanoueNoOkura) {
+ punycode_ = "48jdaa3a6ccpepjrsmlb0q4bwcdtid8fg6c0cai9822utqeruk3om0u4f2wbp0"
+ "em23do0op23cc2ff70mb6tae8aq759gja";
+ plaintext_ = "瓜食めば"
+ "子ども思ほゆ"
+ "栗食めば"
+ "まして偲はゆ"
+ "何処より"
+ "来りしものそ"
+ "眼交に"
+ "もとな懸りて"
+ "安眠し寝さぬ";
+
+ ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
+ PointsToTheNulAfter(plaintext_));
+ ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
+ PointsToTheNulAfter(plaintext_));
+ EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
+}
+
+// The first two lines of the Phoenician-language inscription on the sarcophagus
+// of Eshmunazar II of Sidon, 6th century BCE. Phoenician and many other
+// archaic scripts are allocated in the Supplemental Multilingual Plane (U+10000
+// through U+1FFFF) and thus exercise four-byte-character processing.
+TEST_F(DecodeRustPunycodeTest, EshmunazarSarcophagus) {
+ punycode_ = "wj9caaabaabbaaohcacxvhdc7bgxbccbdcjeacddcedcdlddbdbddcdbdcknfcee"
+ "ifel8del2a7inq9fhcpxikms7a4a9ac9ataaa0g";
+ plaintext_ = "𐤁𐤉𐤓𐤇𐤁𐤋𐤁𐤔𐤍𐤕𐤏𐤎𐤓"
+ "𐤅𐤀𐤓𐤁𐤏𐤗𐤖𐤖𐤖𐤖𐤋𐤌𐤋𐤊𐤉𐤌𐤋𐤊"
+ "𐤀𐤔𐤌𐤍𐤏𐤆𐤓𐤌𐤋𐤊𐤑𐤃𐤍𐤌"
+ "𐤁𐤍𐤌𐤋𐤊𐤕𐤁𐤍𐤕𐤌𐤋𐤊𐤑𐤃𐤍𐤌"
+ "𐤃𐤁𐤓𐤌𐤋𐤊𐤀𐤔𐤌𐤍𐤏𐤆𐤓𐤌𐤋𐤊"
+ "𐤑𐤃𐤍𐤌𐤋𐤀𐤌𐤓𐤍𐤂𐤆𐤋𐤕";
+
+ ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
+ PointsToTheNulAfter(plaintext_));
+ ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
+ PointsToTheNulAfter(plaintext_));
+ EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
+}
+
+} // namespace
+} // namespace debugging_internal
+ABSL_NAMESPACE_END
+} // namespace absl
diff --git a/absl/debugging/internal/demangle.cc b/absl/debugging/internal/demangle.cc
index 381a2b50..caac7636 100644
--- a/absl/debugging/internal/demangle.cc
+++ b/absl/debugging/internal/demangle.cc
@@ -14,18 +14,19 @@
// For reference check out:
// https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling
-//
-// Note that we only have partial C++11 support yet.
#include "absl/debugging/internal/demangle.h"
+#include <cstddef>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
+#include <cstring>
#include <limits>
#include <string>
#include "absl/base/config.h"
+#include "absl/debugging/internal/demangle_rust.h"
#if ABSL_INTERNAL_HAS_CXA_DEMANGLE
#include <cxxabi.h>
@@ -44,14 +45,16 @@ typedef struct {
// List of operators from Itanium C++ ABI.
static const AbbrevPair kOperatorList[] = {
- // New has special syntax (not currently supported).
+ // New has special syntax.
{"nw", "new", 0},
{"na", "new[]", 0},
- // Works except that the 'gs' prefix is not supported.
+ // Special-cased elsewhere to support the optional gs prefix.
{"dl", "delete", 1},
{"da", "delete[]", 1},
+ {"aw", "co_await", 1},
+
{"ps", "+", 1}, // "positive"
{"ng", "-", 1}, // "negative"
{"ad", "&", 1}, // "address-of"
@@ -79,6 +82,7 @@ static const AbbrevPair kOperatorList[] = {
{"rs", ">>", 2},
{"lS", "<<=", 2},
{"rS", ">>=", 2},
+ {"ss", "<=>", 2},
{"eq", "==", 2},
{"ne", "!=", 2},
{"lt", "<", 2},
@@ -98,6 +102,7 @@ static const AbbrevPair kOperatorList[] = {
{"qu", "?", 3},
{"st", "sizeof", 0}, // Special syntax
{"sz", "sizeof", 1}, // Not a real operator name, but used in expressions.
+ {"sZ", "sizeof...", 0}, // Special syntax
{nullptr, nullptr, 0},
};
@@ -187,9 +192,50 @@ typedef struct {
int recursion_depth; // For stack exhaustion prevention.
int steps; // Cap how much work we'll do, regardless of depth.
ParseState parse_state; // Backtrackable state copied for most frames.
+
+ // Conditionally compiled support for marking the position of the first
+ // construct Demangle couldn't parse. This preprocessor symbol is intended
+ // for use by Abseil demangler maintainers only; its behavior is not part of
+ // Abseil's public interface.
+#ifdef ABSL_INTERNAL_DEMANGLE_RECORDS_HIGH_WATER_MARK
+ int high_water_mark; // Input position where parsing failed.
+ bool too_complex; // True if any guard.IsTooComplex() call returned true.
+#endif
} State;
namespace {
+
+#ifdef ABSL_INTERNAL_DEMANGLE_RECORDS_HIGH_WATER_MARK
+void UpdateHighWaterMark(State *state) {
+ if (state->high_water_mark < state->parse_state.mangled_idx) {
+ state->high_water_mark = state->parse_state.mangled_idx;
+ }
+}
+
+void ReportHighWaterMark(State *state) {
+ // Write out the mangled name with the trouble point marked, provided that the
+ // output buffer is large enough and the mangled name did not hit a complexity
+ // limit (in which case the high water mark wouldn't point out an unparsable
+ // construct, only the point where a budget ran out).
+ const size_t input_length = std::strlen(state->mangled_begin);
+ if (input_length + 6 > static_cast<size_t>(state->out_end_idx) ||
+ state->too_complex) {
+ if (state->out_end_idx > 0) state->out[0] = '\0';
+ return;
+ }
+ const size_t high_water_mark = static_cast<size_t>(state->high_water_mark);
+ std::memcpy(state->out, state->mangled_begin, high_water_mark);
+ std::memcpy(state->out + high_water_mark, "--!--", 5);
+ std::memcpy(state->out + high_water_mark + 5,
+ state->mangled_begin + high_water_mark,
+ input_length - high_water_mark);
+ state->out[input_length + 5] = '\0';
+}
+#else
+void UpdateHighWaterMark(State *) {}
+void ReportHighWaterMark(State *) {}
+#endif
+
// Prevent deep recursion / stack exhaustion.
// Also prevent unbounded handling of complex inputs.
class ComplexityGuard {
@@ -201,7 +247,7 @@ class ComplexityGuard {
~ComplexityGuard() { --state_->recursion_depth; }
// 256 levels of recursion seems like a reasonable upper limit on depth.
- // 128 is not enough to demagle synthetic tests from demangle_unittest.txt:
+ // 128 is not enough to demangle synthetic tests from demangle_unittest.txt:
// "_ZaaZZZZ..." and "_ZaaZcvZcvZ..."
static constexpr int kRecursionDepthLimit = 256;
@@ -222,8 +268,14 @@ class ComplexityGuard {
static constexpr int kParseStepsLimit = 1 << 17;
bool IsTooComplex() const {
- return state_->recursion_depth > kRecursionDepthLimit ||
- state_->steps > kParseStepsLimit;
+ if (state_->recursion_depth > kRecursionDepthLimit ||
+ state_->steps > kParseStepsLimit) {
+#ifdef ABSL_INTERNAL_DEMANGLE_RECORDS_HIGH_WATER_MARK
+ state_->too_complex = true;
+#endif
+ return true;
+ }
+ return false;
}
private:
@@ -270,6 +322,10 @@ static void InitState(State* state,
state->out_end_idx = static_cast<int>(out_size);
state->recursion_depth = 0;
state->steps = 0;
+#ifdef ABSL_INTERNAL_DEMANGLE_RECORDS_HIGH_WATER_MARK
+ state->high_water_mark = 0;
+ state->too_complex = false;
+#endif
state->parse_state.mangled_idx = 0;
state->parse_state.out_cur_idx = 0;
@@ -291,13 +347,14 @@ static bool ParseOneCharToken(State *state, const char one_char_token) {
if (guard.IsTooComplex()) return false;
if (RemainingInput(state)[0] == one_char_token) {
++state->parse_state.mangled_idx;
+ UpdateHighWaterMark(state);
return true;
}
return false;
}
-// Returns true and advances "mangled_cur" if we find "two_char_token"
-// at "mangled_cur" position. It is assumed that "two_char_token" does
+// Returns true and advances "mangled_idx" if we find "two_char_token"
+// at "mangled_idx" position. It is assumed that "two_char_token" does
// not contain '\0'.
static bool ParseTwoCharToken(State *state, const char *two_char_token) {
ComplexityGuard guard(state);
@@ -305,11 +362,45 @@ static bool ParseTwoCharToken(State *state, const char *two_char_token) {
if (RemainingInput(state)[0] == two_char_token[0] &&
RemainingInput(state)[1] == two_char_token[1]) {
state->parse_state.mangled_idx += 2;
+ UpdateHighWaterMark(state);
return true;
}
return false;
}
+// Returns true and advances "mangled_idx" if we find "three_char_token"
+// at "mangled_idx" position. It is assumed that "three_char_token" does
+// not contain '\0'.
+static bool ParseThreeCharToken(State *state, const char *three_char_token) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ if (RemainingInput(state)[0] == three_char_token[0] &&
+ RemainingInput(state)[1] == three_char_token[1] &&
+ RemainingInput(state)[2] == three_char_token[2]) {
+ state->parse_state.mangled_idx += 3;
+ UpdateHighWaterMark(state);
+ return true;
+ }
+ return false;
+}
+
+// Returns true and advances "mangled_idx" if we find a copy of the
+// NUL-terminated string "long_token" at "mangled_idx" position.
+static bool ParseLongToken(State *state, const char *long_token) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ int i = 0;
+ for (; long_token[i] != '\0'; ++i) {
+ // Note that we cannot run off the end of the NUL-terminated input here.
+ // Inside the loop body, long_token[i] is known to be different from NUL.
+ // So if we read the NUL on the end of the input here, we return at once.
+ if (RemainingInput(state)[i] != long_token[i]) return false;
+ }
+ state->parse_state.mangled_idx += i;
+ UpdateHighWaterMark(state);
+ return true;
+}
+
// Returns true and advances "mangled_cur" if we find any character in
// "char_class" at "mangled_cur" position.
static bool ParseCharClass(State *state, const char *char_class) {
@@ -322,6 +413,7 @@ static bool ParseCharClass(State *state, const char *char_class) {
for (; *p != '\0'; ++p) {
if (RemainingInput(state)[0] == *p) {
++state->parse_state.mangled_idx;
+ UpdateHighWaterMark(state);
return true;
}
}
@@ -554,6 +646,7 @@ static bool ParseFloatNumber(State *state);
static bool ParseSeqId(State *state);
static bool ParseIdentifier(State *state, size_t length);
static bool ParseOperatorName(State *state, int *arity);
+static bool ParseConversionOperatorType(State *state);
static bool ParseSpecialName(State *state);
static bool ParseCallOffset(State *state);
static bool ParseNVOffset(State *state);
@@ -563,21 +656,33 @@ static bool ParseCtorDtorName(State *state);
static bool ParseDecltype(State *state);
static bool ParseType(State *state);
static bool ParseCVQualifiers(State *state);
+static bool ParseExtendedQualifier(State *state);
static bool ParseBuiltinType(State *state);
+static bool ParseVendorExtendedType(State *state);
static bool ParseFunctionType(State *state);
static bool ParseBareFunctionType(State *state);
+static bool ParseOverloadAttribute(State *state);
static bool ParseClassEnumType(State *state);
static bool ParseArrayType(State *state);
static bool ParsePointerToMemberType(State *state);
static bool ParseTemplateParam(State *state);
+static bool ParseTemplateParamDecl(State *state);
static bool ParseTemplateTemplateParam(State *state);
static bool ParseTemplateArgs(State *state);
static bool ParseTemplateArg(State *state);
static bool ParseBaseUnresolvedName(State *state);
static bool ParseUnresolvedName(State *state);
+static bool ParseUnresolvedQualifierLevel(State *state);
+static bool ParseUnionSelector(State* state);
+static bool ParseFunctionParam(State* state);
+static bool ParseBracedExpression(State *state);
static bool ParseExpression(State *state);
+static bool ParseInitializer(State *state);
static bool ParseExprPrimary(State *state);
-static bool ParseExprCastValue(State *state);
+static bool ParseExprCastValueAndTrailingE(State *state);
+static bool ParseQRequiresClauseExpr(State *state);
+static bool ParseRequirement(State *state);
+static bool ParseTypeConstraint(State *state);
static bool ParseLocalName(State *state);
static bool ParseLocalNameSuffix(State *state);
static bool ParseDiscriminator(State *state);
@@ -622,22 +727,34 @@ static bool ParseMangledName(State *state) {
}
// <encoding> ::= <(function) name> <bare-function-type>
+// [`Q` <requires-clause expr>]
// ::= <(data) name>
// ::= <special-name>
+//
+// NOTE: Based on http://shortn/_Hoq9qG83rx
static bool ParseEncoding(State *state) {
ComplexityGuard guard(state);
if (guard.IsTooComplex()) return false;
- // Implementing the first two productions together as <name>
- // [<bare-function-type>] avoids exponential blowup of backtracking.
+ // Since the first two productions both start with <name>, attempt
+ // to parse it only once to avoid exponential blowup of backtracking.
//
- // Since Optional(...) can't fail, there's no need to copy the state for
- // backtracking.
- if (ParseName(state) && Optional(ParseBareFunctionType(state))) {
+ // We're careful about exponential blowup because <encoding> recursively
+ // appears in other productions downstream of its first two productions,
+ // which means that every call to `ParseName` would possibly indirectly
+ // result in two calls to `ParseName` etc.
+ if (ParseName(state)) {
+ if (!ParseBareFunctionType(state)) {
+ return true; // <(data) name>
+ }
+
+ // Parsed: <(function) name> <bare-function-type>
+ // Pending: [`Q` <requires-clause expr>]
+ ParseQRequiresClauseExpr(state); // restores state on failure
return true;
}
if (ParseSpecialName(state)) {
- return true;
+ return true; // <special-name>
}
return false;
}
@@ -723,19 +840,26 @@ static bool ParseNestedName(State *state) {
// <prefix> ::= <prefix> <unqualified-name>
// ::= <template-prefix> <template-args>
// ::= <template-param>
+// ::= <decltype>
// ::= <substitution>
// ::= # empty
// <template-prefix> ::= <prefix> <(template) unqualified-name>
// ::= <template-param>
// ::= <substitution>
+// ::= <vendor-extended-type>
static bool ParsePrefix(State *state) {
ComplexityGuard guard(state);
if (guard.IsTooComplex()) return false;
bool has_something = false;
while (true) {
MaybeAppendSeparator(state);
- if (ParseTemplateParam(state) ||
+ if (ParseTemplateParam(state) || ParseDecltype(state) ||
ParseSubstitution(state, /*accept_std=*/true) ||
+ // Although the official grammar does not mention it, nested-names
+ // shaped like Nu14__some_builtinIiE6memberE occur in practice, and it
+ // is not clear what else a compiler is supposed to do when a
+ // vendor-extended type has named members.
+ ParseVendorExtendedType(state) ||
ParseUnscopedName(state) ||
(ParseOneCharToken(state, 'M') && ParseUnnamedTypeName(state))) {
has_something = true;
@@ -757,8 +881,14 @@ static bool ParsePrefix(State *state) {
// ::= <source-name> [<abi-tags>]
// ::= <local-source-name> [<abi-tags>]
// ::= <unnamed-type-name> [<abi-tags>]
+// ::= DC <source-name>+ E # C++17 structured binding
+// ::= F <source-name> # C++20 constrained friend
+// ::= F <operator-name> # C++20 constrained friend
//
// <local-source-name> is a GCC extension; see below.
+//
+// For the F notation for constrained friends, see
+// https://github.com/itanium-cxx-abi/cxx-abi/issues/24#issuecomment-1491130332.
static bool ParseUnqualifiedName(State *state) {
ComplexityGuard guard(state);
if (guard.IsTooComplex()) return false;
@@ -767,6 +897,23 @@ static bool ParseUnqualifiedName(State *state) {
ParseUnnamedTypeName(state)) {
return ParseAbiTags(state);
}
+
+ // DC <source-name>+ E
+ ParseState copy = state->parse_state;
+ if (ParseTwoCharToken(state, "DC") && OneOrMore(ParseSourceName, state) &&
+ ParseOneCharToken(state, 'E')) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // F <source-name>
+ // F <operator-name>
+ if (ParseOneCharToken(state, 'F') && MaybeAppend(state, "friend ") &&
+ (ParseSourceName(state) || ParseOperatorName(state, nullptr))) {
+ return true;
+ }
+ state->parse_state = copy;
+
return false;
}
@@ -824,7 +971,11 @@ static bool ParseLocalSourceName(State *state) {
// <unnamed-type-name> ::= Ut [<(nonnegative) number>] _
// ::= <closure-type-name>
// <closure-type-name> ::= Ul <lambda-sig> E [<(nonnegative) number>] _
-// <lambda-sig> ::= <(parameter) type>+
+// <lambda-sig> ::= <template-param-decl>* <(parameter) type>+
+//
+// For <template-param-decl>* in <lambda-sig> see:
+//
+// https://github.com/itanium-cxx-abi/cxx-abi/issues/31
static bool ParseUnnamedTypeName(State *state) {
ComplexityGuard guard(state);
if (guard.IsTooComplex()) return false;
@@ -847,6 +998,7 @@ static bool ParseUnnamedTypeName(State *state) {
// Closure type.
which = -1;
if (ParseTwoCharToken(state, "Ul") && DisableAppend(state) &&
+ ZeroOrMore(ParseTemplateParamDecl, state) &&
OneOrMore(ParseType, state) && RestoreAppend(state, copy.append) &&
ParseOneCharToken(state, 'E') && Optional(ParseNumber(state, &which)) &&
which <= std::numeric_limits<int>::max() - 2 && // Don't overflow.
@@ -888,6 +1040,7 @@ static bool ParseNumber(State *state, int *number_out) {
}
if (p != RemainingInput(state)) { // Conversion succeeded.
state->parse_state.mangled_idx += p - RemainingInput(state);
+ UpdateHighWaterMark(state);
if (number_out != nullptr) {
// Note: possibly truncate "number".
*number_out = static_cast<int>(number);
@@ -910,6 +1063,7 @@ static bool ParseFloatNumber(State *state) {
}
if (p != RemainingInput(state)) { // Conversion succeeded.
state->parse_state.mangled_idx += p - RemainingInput(state);
+ UpdateHighWaterMark(state);
return true;
}
return false;
@@ -928,6 +1082,7 @@ static bool ParseSeqId(State *state) {
}
if (p != RemainingInput(state)) { // Conversion succeeded.
state->parse_state.mangled_idx += p - RemainingInput(state);
+ UpdateHighWaterMark(state);
return true;
}
return false;
@@ -946,11 +1101,13 @@ static bool ParseIdentifier(State *state, size_t length) {
MaybeAppendWithLength(state, RemainingInput(state), length);
}
state->parse_state.mangled_idx += length;
+ UpdateHighWaterMark(state);
return true;
}
// <operator-name> ::= nw, and other two letters cases
// ::= cv <type> # (cast)
+// ::= li <source-name> # C++11 user-defined literal
// ::= v <digit> <source-name> # vendor extended operator
static bool ParseOperatorName(State *state, int *arity) {
ComplexityGuard guard(state);
@@ -961,7 +1118,7 @@ static bool ParseOperatorName(State *state, int *arity) {
// First check with "cv" (cast) case.
ParseState copy = state->parse_state;
if (ParseTwoCharToken(state, "cv") && MaybeAppend(state, "operator ") &&
- EnterNestedName(state) && ParseType(state) &&
+ EnterNestedName(state) && ParseConversionOperatorType(state) &&
LeaveNestedName(state, copy.nest_level)) {
if (arity != nullptr) {
*arity = 1;
@@ -970,6 +1127,13 @@ static bool ParseOperatorName(State *state, int *arity) {
}
state->parse_state = copy;
+ // Then user-defined literals.
+ if (ParseTwoCharToken(state, "li") && MaybeAppend(state, "operator\"\" ") &&
+ ParseSourceName(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
// Then vendor extended operators.
if (ParseOneCharToken(state, 'v') && ParseDigit(state, arity) &&
ParseSourceName(state)) {
@@ -997,36 +1161,120 @@ static bool ParseOperatorName(State *state, int *arity) {
}
MaybeAppend(state, p->real_name);
state->parse_state.mangled_idx += 2;
+ UpdateHighWaterMark(state);
return true;
}
}
return false;
}
+// <operator-name> ::= cv <type> # (cast)
+//
+// The name of a conversion operator is the one place where cv-qualifiers, *, &,
+// and other simple type combinators are expected to appear in our stripped-down
+// demangling (elsewhere they appear in function signatures or template
+// arguments, which we omit from the output). We make reasonable efforts to
+// render simple cases accurately.
+static bool ParseConversionOperatorType(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ ParseState copy = state->parse_state;
+
+ // Scan pointers, const, and other easy mangling prefixes with postfix
+ // demanglings. Remember the range of input for later rescanning.
+ //
+ // See `ParseType` and the `switch` below for the meaning of each char.
+ const char* begin_simple_prefixes = RemainingInput(state);
+ while (ParseCharClass(state, "OPRCGrVK")) {}
+ const char* end_simple_prefixes = RemainingInput(state);
+
+ // Emit the base type first.
+ if (!ParseType(state)) {
+ state->parse_state = copy;
+ return false;
+ }
+
+ // Then rescan the easy type combinators in reverse order to emit their
+ // demanglings in the expected output order.
+ while (begin_simple_prefixes != end_simple_prefixes) {
+ switch (*--end_simple_prefixes) {
+ case 'P':
+ MaybeAppend(state, "*");
+ break;
+ case 'R':
+ MaybeAppend(state, "&");
+ break;
+ case 'O':
+ MaybeAppend(state, "&&");
+ break;
+ case 'C':
+ MaybeAppend(state, " _Complex");
+ break;
+ case 'G':
+ MaybeAppend(state, " _Imaginary");
+ break;
+ case 'r':
+ MaybeAppend(state, " restrict");
+ break;
+ case 'V':
+ MaybeAppend(state, " volatile");
+ break;
+ case 'K':
+ MaybeAppend(state, " const");
+ break;
+ }
+ }
+ return true;
+}
+
// <special-name> ::= TV <type>
// ::= TT <type>
// ::= TI <type>
// ::= TS <type>
-// ::= TH <type> # thread-local
+// ::= TW <name> # thread-local wrapper
+// ::= TH <name> # thread-local initialization
// ::= Tc <call-offset> <call-offset> <(base) encoding>
// ::= GV <(object) name>
+// ::= GR <(object) name> [<seq-id>] _
// ::= T <call-offset> <(base) encoding>
+// ::= GTt <encoding> # transaction-safe entry point
+// ::= TA <template-arg> # nontype template parameter object
// G++ extensions:
// ::= TC <type> <(offset) number> _ <(base) type>
// ::= TF <type>
// ::= TJ <type>
-// ::= GR <name>
+// ::= GR <name> # without final _, perhaps an earlier form?
// ::= GA <encoding>
// ::= Th <call-offset> <(base) encoding>
// ::= Tv <call-offset> <(base) encoding>
//
-// Note: we don't care much about them since they don't appear in
-// stack traces. The are special data.
+// Note: Most of these are special data, not functions that occur in stack
+// traces. Exceptions are TW and TH, which denote functions supporting the
+// thread_local feature. For these see:
+//
+// https://maskray.me/blog/2021-02-14-all-about-thread-local-storage
+//
+// For TA see https://github.com/itanium-cxx-abi/cxx-abi/issues/63.
static bool ParseSpecialName(State *state) {
ComplexityGuard guard(state);
if (guard.IsTooComplex()) return false;
ParseState copy = state->parse_state;
- if (ParseOneCharToken(state, 'T') && ParseCharClass(state, "VTISH") &&
+
+ if (ParseTwoCharToken(state, "TW")) {
+ MaybeAppend(state, "thread-local wrapper routine for ");
+ if (ParseName(state)) return true;
+ state->parse_state = copy;
+ return false;
+ }
+
+ if (ParseTwoCharToken(state, "TH")) {
+ MaybeAppend(state, "thread-local initialization routine for ");
+ if (ParseName(state)) return true;
+ state->parse_state = copy;
+ return false;
+ }
+
+ if (ParseOneCharToken(state, 'T') && ParseCharClass(state, "VTIS") &&
ParseType(state)) {
return true;
}
@@ -1064,21 +1312,51 @@ static bool ParseSpecialName(State *state) {
}
state->parse_state = copy;
- if (ParseTwoCharToken(state, "GR") && ParseName(state)) {
+ // <special-name> ::= GR <(object) name> [<seq-id>] _ # modern standard
+ // ::= GR <(object) name> # also recognized
+ if (ParseTwoCharToken(state, "GR")) {
+ MaybeAppend(state, "reference temporary for ");
+ if (!ParseName(state)) {
+ state->parse_state = copy;
+ return false;
+ }
+ const bool has_seq_id = ParseSeqId(state);
+ const bool has_underscore = ParseOneCharToken(state, '_');
+ if (has_seq_id && !has_underscore) {
+ state->parse_state = copy;
+ return false;
+ }
return true;
}
- state->parse_state = copy;
if (ParseTwoCharToken(state, "GA") && ParseEncoding(state)) {
return true;
}
state->parse_state = copy;
+ if (ParseThreeCharToken(state, "GTt") &&
+ MaybeAppend(state, "transaction clone for ") && ParseEncoding(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
if (ParseOneCharToken(state, 'T') && ParseCharClass(state, "hv") &&
ParseCallOffset(state) && ParseEncoding(state)) {
return true;
}
state->parse_state = copy;
+
+ if (ParseTwoCharToken(state, "TA")) {
+ bool append = state->parse_state.append;
+ DisableAppend(state);
+ if (ParseTemplateArg(state)) {
+ RestoreAppend(state, append);
+ MaybeAppend(state, "template parameter object");
+ return true;
+ }
+ }
+ state->parse_state = copy;
+
return false;
}
@@ -1182,7 +1460,6 @@ static bool ParseDecltype(State *state) {
// ::= O <type> # rvalue reference-to (C++0x)
// ::= C <type> # complex pair (C 2000)
// ::= G <type> # imaginary (C 2000)
-// ::= U <source-name> <type> # vendor extended type qualifier
// ::= <builtin-type>
// ::= <function-type>
// ::= <class-enum-type> # note: just an alias for <name>
@@ -1193,7 +1470,9 @@ static bool ParseDecltype(State *state) {
// ::= <decltype>
// ::= <substitution>
// ::= Dp <type> # pack expansion of (C++0x)
-// ::= Dv <num-elems> _ # GNU vector extension
+// ::= Dv <(elements) number> _ <type> # GNU vector extension
+// ::= Dv <(bytes) expression> _ <type>
+// ::= Dk <type-constraint> # constrained auto
//
static bool ParseType(State *state) {
ComplexityGuard guard(state);
@@ -1236,12 +1515,6 @@ static bool ParseType(State *state) {
}
state->parse_state = copy;
- if (ParseOneCharToken(state, 'U') && ParseSourceName(state) &&
- ParseType(state)) {
- return true;
- }
- state->parse_state = copy;
-
if (ParseBuiltinType(state) || ParseFunctionType(state) ||
ParseClassEnumType(state) || ParseArrayType(state) ||
ParsePointerToMemberType(state) || ParseDecltype(state) ||
@@ -1260,54 +1533,160 @@ static bool ParseType(State *state) {
return true;
}
+ // GNU vector extension Dv <number> _ <type>
if (ParseTwoCharToken(state, "Dv") && ParseNumber(state, nullptr) &&
- ParseOneCharToken(state, '_')) {
+ ParseOneCharToken(state, '_') && ParseType(state)) {
return true;
}
state->parse_state = copy;
- return false;
+ // GNU vector extension Dv <expression> _ <type>
+ if (ParseTwoCharToken(state, "Dv") && ParseExpression(state) &&
+ ParseOneCharToken(state, '_') && ParseType(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ if (ParseTwoCharToken(state, "Dk") && ParseTypeConstraint(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // For this notation see CXXNameMangler::mangleType in Clang's source code.
+ // The relevant logic and its comment "not clear how to mangle this!" date
+ // from 2011, so it may be with us awhile.
+ return ParseLongToken(state, "_SUBSTPACK_");
}
+// <qualifiers> ::= <extended-qualifier>* <CV-qualifiers>
// <CV-qualifiers> ::= [r] [V] [K]
+//
// We don't allow empty <CV-qualifiers> to avoid infinite loop in
// ParseType().
static bool ParseCVQualifiers(State *state) {
ComplexityGuard guard(state);
if (guard.IsTooComplex()) return false;
int num_cv_qualifiers = 0;
+ while (ParseExtendedQualifier(state)) ++num_cv_qualifiers;
num_cv_qualifiers += ParseOneCharToken(state, 'r');
num_cv_qualifiers += ParseOneCharToken(state, 'V');
num_cv_qualifiers += ParseOneCharToken(state, 'K');
return num_cv_qualifiers > 0;
}
+// <extended-qualifier> ::= U <source-name> [<template-args>]
+static bool ParseExtendedQualifier(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ ParseState copy = state->parse_state;
+
+ if (!ParseOneCharToken(state, 'U')) return false;
+
+ bool append = state->parse_state.append;
+ DisableAppend(state);
+ if (!ParseSourceName(state)) {
+ state->parse_state = copy;
+ return false;
+ }
+ Optional(ParseTemplateArgs(state));
+ RestoreAppend(state, append);
+ return true;
+}
+
// <builtin-type> ::= v, etc. # single-character builtin types
-// ::= u <source-name>
+// ::= <vendor-extended-type>
// ::= Dd, etc. # two-character builtin types
+// ::= DB (<number> | <expression>) _ # _BitInt(N)
+// ::= DU (<number> | <expression>) _ # unsigned _BitInt(N)
+// ::= DF <number> _ # _FloatN (N bits)
+// ::= DF <number> x # _FloatNx
+// ::= DF16b # std::bfloat16_t
//
// Not supported:
-// ::= DF <number> _ # _FloatN (N bits)
-//
+// ::= [DS] DA <fixed-point-size>
+// ::= [DS] DR <fixed-point-size>
+// because real implementations of N1169 fixed-point are scant.
static bool ParseBuiltinType(State *state) {
ComplexityGuard guard(state);
if (guard.IsTooComplex()) return false;
- const AbbrevPair *p;
- for (p = kBuiltinTypeList; p->abbrev != nullptr; ++p) {
+ ParseState copy = state->parse_state;
+
+ // DB (<number> | <expression>) _ # _BitInt(N)
+ // DU (<number> | <expression>) _ # unsigned _BitInt(N)
+ if (ParseTwoCharToken(state, "DB") ||
+ (ParseTwoCharToken(state, "DU") && MaybeAppend(state, "unsigned "))) {
+ bool append = state->parse_state.append;
+ DisableAppend(state);
+ int number = -1;
+ if (!ParseNumber(state, &number) && !ParseExpression(state)) {
+ state->parse_state = copy;
+ return false;
+ }
+ RestoreAppend(state, append);
+
+ if (!ParseOneCharToken(state, '_')) {
+ state->parse_state = copy;
+ return false;
+ }
+
+ MaybeAppend(state, "_BitInt(");
+ if (number >= 0) {
+ MaybeAppendDecimal(state, number);
+ } else {
+ MaybeAppend(state, "?"); // the best we can do for dependent sizes
+ }
+ MaybeAppend(state, ")");
+ return true;
+ }
+
+ // DF <number> _ # _FloatN
+ // DF <number> x # _FloatNx
+ // DF16b # std::bfloat16_t
+ if (ParseTwoCharToken(state, "DF")) {
+ if (ParseThreeCharToken(state, "16b")) {
+ MaybeAppend(state, "std::bfloat16_t");
+ return true;
+ }
+ int number = 0;
+ if (!ParseNumber(state, &number)) {
+ state->parse_state = copy;
+ return false;
+ }
+ MaybeAppend(state, "_Float");
+ MaybeAppendDecimal(state, number);
+ if (ParseOneCharToken(state, 'x')) {
+ MaybeAppend(state, "x");
+ return true;
+ }
+ if (ParseOneCharToken(state, '_')) return true;
+ state->parse_state = copy;
+ return false;
+ }
+
+ for (const AbbrevPair *p = kBuiltinTypeList; p->abbrev != nullptr; ++p) {
// Guaranteed only 1- or 2-character strings in kBuiltinTypeList.
if (p->abbrev[1] == '\0') {
if (ParseOneCharToken(state, p->abbrev[0])) {
MaybeAppend(state, p->real_name);
- return true;
+ return true; // ::= v, etc. # single-character builtin types
}
} else if (p->abbrev[2] == '\0' && ParseTwoCharToken(state, p->abbrev)) {
MaybeAppend(state, p->real_name);
- return true;
+ return true; // ::= Dd, etc. # two-character builtin types
}
}
+ return ParseVendorExtendedType(state);
+}
+
+// <vendor-extended-type> ::= u <source-name> [<template-args>]
+static bool ParseVendorExtendedType(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+
ParseState copy = state->parse_state;
- if (ParseOneCharToken(state, 'u') && ParseSourceName(state)) {
+ if (ParseOneCharToken(state, 'u') && ParseSourceName(state) &&
+ Optional(ParseTemplateArgs(state))) {
return true;
}
state->parse_state = copy;
@@ -1342,28 +1721,44 @@ static bool ParseExceptionSpec(State *state) {
return false;
}
-// <function-type> ::= [exception-spec] F [Y] <bare-function-type> [O] E
+// <function-type> ::=
+// [exception-spec] [Dx] F [Y] <bare-function-type> [<ref-qualifier>] E
+//
+// <ref-qualifier> ::= R | O
static bool ParseFunctionType(State *state) {
ComplexityGuard guard(state);
if (guard.IsTooComplex()) return false;
ParseState copy = state->parse_state;
- if (Optional(ParseExceptionSpec(state)) && ParseOneCharToken(state, 'F') &&
- Optional(ParseOneCharToken(state, 'Y')) && ParseBareFunctionType(state) &&
- Optional(ParseOneCharToken(state, 'O')) &&
- ParseOneCharToken(state, 'E')) {
- return true;
+ Optional(ParseExceptionSpec(state));
+ Optional(ParseTwoCharToken(state, "Dx"));
+ if (!ParseOneCharToken(state, 'F')) {
+ state->parse_state = copy;
+ return false;
}
- state->parse_state = copy;
- return false;
+ Optional(ParseOneCharToken(state, 'Y'));
+ if (!ParseBareFunctionType(state)) {
+ state->parse_state = copy;
+ return false;
+ }
+ Optional(ParseCharClass(state, "RO"));
+ if (!ParseOneCharToken(state, 'E')) {
+ state->parse_state = copy;
+ return false;
+ }
+ return true;
}
-// <bare-function-type> ::= <(signature) type>+
+// <bare-function-type> ::= <overload-attribute>* <(signature) type>+
+//
+// The <overload-attribute>* prefix is nonstandard; see the comment on
+// ParseOverloadAttribute.
static bool ParseBareFunctionType(State *state) {
ComplexityGuard guard(state);
if (guard.IsTooComplex()) return false;
ParseState copy = state->parse_state;
DisableAppend(state);
- if (OneOrMore(ParseType, state)) {
+ if (ZeroOrMore(ParseOverloadAttribute, state) &&
+ OneOrMore(ParseType, state)) {
RestoreAppend(state, copy.append);
MaybeAppend(state, "()");
return true;
@@ -1372,11 +1767,43 @@ static bool ParseBareFunctionType(State *state) {
return false;
}
+// <overload-attribute> ::= Ua <name>
+//
+// The nonstandard <overload-attribute> production is sufficient to accept the
+// current implementation of __attribute__((enable_if(condition, "message")))
+// and future attributes of a similar shape. See
+// https://clang.llvm.org/docs/AttributeReference.html#enable-if and the
+// definition of CXXNameMangler::mangleFunctionEncodingBareType in Clang's
+// source code.
+static bool ParseOverloadAttribute(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ ParseState copy = state->parse_state;
+ if (ParseTwoCharToken(state, "Ua") && ParseName(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+ return false;
+}
+
// <class-enum-type> ::= <name>
+// ::= Ts <name> # struct Name or class Name
+// ::= Tu <name> # union Name
+// ::= Te <name> # enum Name
+//
+// See http://shortn/_W3YrltiEd0.
static bool ParseClassEnumType(State *state) {
ComplexityGuard guard(state);
if (guard.IsTooComplex()) return false;
- return ParseName(state);
+ ParseState copy = state->parse_state;
+ if (Optional(ParseTwoCharToken(state, "Ts") ||
+ ParseTwoCharToken(state, "Tu") ||
+ ParseTwoCharToken(state, "Te")) &&
+ ParseName(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+ return false;
}
// <array-type> ::= A <(positive dimension) number> _ <(element) type>
@@ -1413,21 +1840,83 @@ static bool ParsePointerToMemberType(State *state) {
// <template-param> ::= T_
// ::= T <parameter-2 non-negative number> _
+// ::= TL <level-1> __
+// ::= TL <level-1> _ <parameter-2 non-negative number> _
static bool ParseTemplateParam(State *state) {
ComplexityGuard guard(state);
if (guard.IsTooComplex()) return false;
if (ParseTwoCharToken(state, "T_")) {
MaybeAppend(state, "?"); // We don't support template substitutions.
- return true;
+ return true; // ::= T_
}
ParseState copy = state->parse_state;
if (ParseOneCharToken(state, 'T') && ParseNumber(state, nullptr) &&
ParseOneCharToken(state, '_')) {
MaybeAppend(state, "?"); // We don't support template substitutions.
+ return true; // ::= T <parameter-2 non-negative number> _
+ }
+ state->parse_state = copy;
+
+ if (ParseTwoCharToken(state, "TL") && ParseNumber(state, nullptr)) {
+ if (ParseTwoCharToken(state, "__")) {
+ MaybeAppend(state, "?"); // We don't support template substitutions.
+ return true; // ::= TL <level-1> __
+ }
+
+ if (ParseOneCharToken(state, '_') && ParseNumber(state, nullptr) &&
+ ParseOneCharToken(state, '_')) {
+ MaybeAppend(state, "?"); // We don't support template substitutions.
+ return true; // ::= TL <level-1> _ <parameter-2 non-negative number> _
+ }
+ }
+ state->parse_state = copy;
+ return false;
+}
+
+// <template-param-decl>
+// ::= Ty # template type parameter
+// ::= Tk <concept name> [<template-args>] # constrained type parameter
+// ::= Tn <type> # template non-type parameter
+// ::= Tt <template-param-decl>* E # template template parameter
+// ::= Tp <template-param-decl> # template parameter pack
+//
+// NOTE: <concept name> is just a <name>: http://shortn/_MqJVyr0fc1
+// TODO(b/324066279): Implement optional suffix for `Tt`:
+// [Q <requires-clause expr>]
+static bool ParseTemplateParamDecl(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ ParseState copy = state->parse_state;
+
+ if (ParseTwoCharToken(state, "Ty")) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ if (ParseTwoCharToken(state, "Tk") && ParseName(state) &&
+ Optional(ParseTemplateArgs(state))) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ if (ParseTwoCharToken(state, "Tn") && ParseType(state)) {
return true;
}
state->parse_state = copy;
+
+ if (ParseTwoCharToken(state, "Tt") &&
+ ZeroOrMore(ParseTemplateParamDecl, state) &&
+ ParseOneCharToken(state, 'E')) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ if (ParseTwoCharToken(state, "Tp") && ParseTemplateParamDecl(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
return false;
}
@@ -1441,13 +1930,14 @@ static bool ParseTemplateTemplateParam(State *state) {
ParseSubstitution(state, /*accept_std=*/false));
}
-// <template-args> ::= I <template-arg>+ E
+// <template-args> ::= I <template-arg>+ [Q <requires-clause expr>] E
static bool ParseTemplateArgs(State *state) {
ComplexityGuard guard(state);
if (guard.IsTooComplex()) return false;
ParseState copy = state->parse_state;
DisableAppend(state);
if (ParseOneCharToken(state, 'I') && OneOrMore(ParseTemplateArg, state) &&
+ Optional(ParseQRequiresClauseExpr(state)) &&
ParseOneCharToken(state, 'E')) {
RestoreAppend(state, copy.append);
MaybeAppend(state, "<>");
@@ -1457,7 +1947,8 @@ static bool ParseTemplateArgs(State *state) {
return false;
}
-// <template-arg> ::= <type>
+// <template-arg> ::= <template-param-decl> <template-arg>
+// ::= <type>
// ::= <expr-primary>
// ::= J <template-arg>* E # argument pack
// ::= X <expression> E
@@ -1541,7 +2032,7 @@ static bool ParseTemplateArg(State *state) {
// ::= L <source-name> [<template-args>] [<expr-cast-value> E]
if (ParseLocalSourceName(state) && Optional(ParseTemplateArgs(state))) {
copy = state->parse_state;
- if (ParseExprCastValue(state) && ParseOneCharToken(state, 'E')) {
+ if (ParseExprCastValueAndTrailingE(state)) {
return true;
}
state->parse_state = copy;
@@ -1560,6 +2051,12 @@ static bool ParseTemplateArg(State *state) {
return true;
}
state->parse_state = copy;
+
+ if (ParseTemplateParamDecl(state) && ParseTemplateArg(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
return false;
}
@@ -1614,6 +2111,13 @@ static bool ParseBaseUnresolvedName(State *state) {
// <base-unresolved-name>
// ::= [gs] sr <unresolved-qualifier-level>+ E
// <base-unresolved-name>
+// ::= sr St <simple-id> <simple-id> # nonstandard
+//
+// The last case is not part of the official grammar but has been observed in
+// real-world examples that the GNU demangler (but not the LLVM demangler) is
+// able to decode; see demangle_test.cc for one such symbol name. The shape
+// sr St <simple-id> <simple-id> was inferred by closed-box testing of the GNU
+// demangler.
static bool ParseUnresolvedName(State *state) {
ComplexityGuard guard(state);
if (guard.IsTooComplex()) return false;
@@ -1633,7 +2137,7 @@ static bool ParseUnresolvedName(State *state) {
if (ParseTwoCharToken(state, "sr") && ParseOneCharToken(state, 'N') &&
ParseUnresolvedType(state) &&
- OneOrMore(/* <unresolved-qualifier-level> ::= */ ParseSimpleId, state) &&
+ OneOrMore(ParseUnresolvedQualifierLevel, state) &&
ParseOneCharToken(state, 'E') && ParseBaseUnresolvedName(state)) {
return true;
}
@@ -1641,35 +2145,160 @@ static bool ParseUnresolvedName(State *state) {
if (Optional(ParseTwoCharToken(state, "gs")) &&
ParseTwoCharToken(state, "sr") &&
- OneOrMore(/* <unresolved-qualifier-level> ::= */ ParseSimpleId, state) &&
+ OneOrMore(ParseUnresolvedQualifierLevel, state) &&
ParseOneCharToken(state, 'E') && ParseBaseUnresolvedName(state)) {
return true;
}
state->parse_state = copy;
+ if (ParseTwoCharToken(state, "sr") && ParseTwoCharToken(state, "St") &&
+ ParseSimpleId(state) && ParseSimpleId(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
return false;
}
+// <unresolved-qualifier-level> ::= <simple-id>
+// ::= <substitution> <template-args>
+//
+// The production <substitution> <template-args> is nonstandard but is observed
+// in practice. An upstream discussion on the best shape of <unresolved-name>
+// has not converged:
+//
+// https://github.com/itanium-cxx-abi/cxx-abi/issues/38
+static bool ParseUnresolvedQualifierLevel(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+
+ if (ParseSimpleId(state)) return true;
+
+ ParseState copy = state->parse_state;
+ if (ParseSubstitution(state, /*accept_std=*/false) &&
+ ParseTemplateArgs(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+ return false;
+}
+
+// <union-selector> ::= _ [<number>]
+//
+// https://github.com/itanium-cxx-abi/cxx-abi/issues/47
+static bool ParseUnionSelector(State *state) {
+ return ParseOneCharToken(state, '_') && Optional(ParseNumber(state, nullptr));
+}
+
+// <function-param> ::= fp <(top-level) CV-qualifiers> _
+// ::= fp <(top-level) CV-qualifiers> <number> _
+// ::= fL <number> p <(top-level) CV-qualifiers> _
+// ::= fL <number> p <(top-level) CV-qualifiers> <number> _
+// ::= fpT # this
+static bool ParseFunctionParam(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+
+ ParseState copy = state->parse_state;
+
+ // Function-param expression (level 0).
+ if (ParseTwoCharToken(state, "fp") && Optional(ParseCVQualifiers(state)) &&
+ Optional(ParseNumber(state, nullptr)) && ParseOneCharToken(state, '_')) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // Function-param expression (level 1+).
+ if (ParseTwoCharToken(state, "fL") && Optional(ParseNumber(state, nullptr)) &&
+ ParseOneCharToken(state, 'p') && Optional(ParseCVQualifiers(state)) &&
+ Optional(ParseNumber(state, nullptr)) && ParseOneCharToken(state, '_')) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ return ParseThreeCharToken(state, "fpT");
+}
+
+// <braced-expression> ::= <expression>
+// ::= di <field source-name> <braced-expression>
+// ::= dx <index expression> <braced-expression>
+// ::= dX <expression> <expression> <braced-expression>
+static bool ParseBracedExpression(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+
+ ParseState copy = state->parse_state;
+
+ if (ParseTwoCharToken(state, "di") && ParseSourceName(state) &&
+ ParseBracedExpression(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ if (ParseTwoCharToken(state, "dx") && ParseExpression(state) &&
+ ParseBracedExpression(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ if (ParseTwoCharToken(state, "dX") &&
+ ParseExpression(state) && ParseExpression(state) &&
+ ParseBracedExpression(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ return ParseExpression(state);
+}
+
// <expression> ::= <1-ary operator-name> <expression>
// ::= <2-ary operator-name> <expression> <expression>
// ::= <3-ary operator-name> <expression> <expression> <expression>
+// ::= pp_ <expression> # ++e; pp <expression> is e++
+// ::= mm_ <expression> # --e; mm <expression> is e--
// ::= cl <expression>+ E
// ::= cp <simple-id> <expression>* E # Clang-specific.
+// ::= so <type> <expression> [<number>] <union-selector>* [p] E
// ::= cv <type> <expression> # type (expression)
// ::= cv <type> _ <expression>* E # type (expr-list)
+// ::= tl <type> <braced-expression>* E
+// ::= il <braced-expression>* E
+// ::= [gs] nw <expression>* _ <type> E
+// ::= [gs] nw <expression>* _ <type> <initializer>
+// ::= [gs] na <expression>* _ <type> E
+// ::= [gs] na <expression>* _ <type> <initializer>
+// ::= [gs] dl <expression>
+// ::= [gs] da <expression>
+// ::= dc <type> <expression>
+// ::= sc <type> <expression>
+// ::= cc <type> <expression>
+// ::= rc <type> <expression>
+// ::= ti <type>
+// ::= te <expression>
// ::= st <type>
+// ::= at <type>
+// ::= az <expression>
+// ::= nx <expression>
// ::= <template-param>
// ::= <function-param>
+// ::= sZ <template-param>
+// ::= sZ <function-param>
+// ::= sP <template-arg>* E
// ::= <expr-primary>
// ::= dt <expression> <unresolved-name> # expr.name
// ::= pt <expression> <unresolved-name> # expr->name
// ::= sp <expression> # argument pack expansion
+// ::= fl <binary operator-name> <expression>
+// ::= fr <binary operator-name> <expression>
+// ::= fL <binary operator-name> <expression> <expression>
+// ::= fR <binary operator-name> <expression> <expression>
+// ::= tw <expression>
+// ::= tr
// ::= sr <type> <unqualified-name> <template-args>
// ::= sr <type> <unqualified-name>
-// <function-param> ::= fp <(top-level) CV-qualifiers> _
-// ::= fp <(top-level) CV-qualifiers> <number> _
-// ::= fL <number> p <(top-level) CV-qualifiers> _
-// ::= fL <number> p <(top-level) CV-qualifiers> <number> _
+// ::= u <source-name> <template-arg>* E # vendor extension
+// ::= rq <requirement>+ E
+// ::= rQ <bare-function-type> _ <requirement>+ E
static bool ParseExpression(State *state) {
ComplexityGuard guard(state);
if (guard.IsTooComplex()) return false;
@@ -1686,6 +2315,15 @@ static bool ParseExpression(State *state) {
}
state->parse_state = copy;
+ // Preincrement and predecrement. Postincrement and postdecrement are handled
+ // by the operator-name logic later on.
+ if ((ParseThreeCharToken(state, "pp_") ||
+ ParseThreeCharToken(state, "mm_")) &&
+ ParseExpression(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
// Clang-specific "cp <simple-id> <expression>* E"
// https://clang.llvm.org/doxygen/ItaniumMangle_8cpp_source.html#l04338
if (ParseTwoCharToken(state, "cp") && ParseSimpleId(state) &&
@@ -1694,17 +2332,65 @@ static bool ParseExpression(State *state) {
}
state->parse_state = copy;
- // Function-param expression (level 0).
- if (ParseTwoCharToken(state, "fp") && Optional(ParseCVQualifiers(state)) &&
- Optional(ParseNumber(state, nullptr)) && ParseOneCharToken(state, '_')) {
+ // <expression> ::= so <type> <expression> [<number>] <union-selector>* [p] E
+ //
+ // https://github.com/itanium-cxx-abi/cxx-abi/issues/47
+ if (ParseTwoCharToken(state, "so") && ParseType(state) &&
+ ParseExpression(state) && Optional(ParseNumber(state, nullptr)) &&
+ ZeroOrMore(ParseUnionSelector, state) &&
+ Optional(ParseOneCharToken(state, 'p')) &&
+ ParseOneCharToken(state, 'E')) {
return true;
}
state->parse_state = copy;
- // Function-param expression (level 1+).
- if (ParseTwoCharToken(state, "fL") && Optional(ParseNumber(state, nullptr)) &&
- ParseOneCharToken(state, 'p') && Optional(ParseCVQualifiers(state)) &&
- Optional(ParseNumber(state, nullptr)) && ParseOneCharToken(state, '_')) {
+ // <expression> ::= <function-param>
+ if (ParseFunctionParam(state)) return true;
+ state->parse_state = copy;
+
+ // <expression> ::= tl <type> <braced-expression>* E
+ if (ParseTwoCharToken(state, "tl") && ParseType(state) &&
+ ZeroOrMore(ParseBracedExpression, state) &&
+ ParseOneCharToken(state, 'E')) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // <expression> ::= il <braced-expression>* E
+ if (ParseTwoCharToken(state, "il") &&
+ ZeroOrMore(ParseBracedExpression, state) &&
+ ParseOneCharToken(state, 'E')) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // <expression> ::= [gs] nw <expression>* _ <type> E
+ // ::= [gs] nw <expression>* _ <type> <initializer>
+ // ::= [gs] na <expression>* _ <type> E
+ // ::= [gs] na <expression>* _ <type> <initializer>
+ if (Optional(ParseTwoCharToken(state, "gs")) &&
+ (ParseTwoCharToken(state, "nw") || ParseTwoCharToken(state, "na")) &&
+ ZeroOrMore(ParseExpression, state) && ParseOneCharToken(state, '_') &&
+ ParseType(state) &&
+ (ParseOneCharToken(state, 'E') || ParseInitializer(state))) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // <expression> ::= [gs] dl <expression>
+ // ::= [gs] da <expression>
+ if (Optional(ParseTwoCharToken(state, "gs")) &&
+ (ParseTwoCharToken(state, "dl") || ParseTwoCharToken(state, "da")) &&
+ ParseExpression(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // dynamic_cast, static_cast, const_cast, reinterpret_cast.
+ //
+ // <expression> ::= (dc | sc | cc | rc) <type> <expression>
+ if (ParseCharClass(state, "dscr") && ParseOneCharToken(state, 'c') &&
+ ParseType(state) && ParseExpression(state)) {
return true;
}
state->parse_state = copy;
@@ -1746,15 +2432,96 @@ static bool ParseExpression(State *state) {
}
state->parse_state = copy;
+ // typeid(type)
+ if (ParseTwoCharToken(state, "ti") && ParseType(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // typeid(expression)
+ if (ParseTwoCharToken(state, "te") && ParseExpression(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
// sizeof type
if (ParseTwoCharToken(state, "st") && ParseType(state)) {
return true;
}
state->parse_state = copy;
+ // alignof(type)
+ if (ParseTwoCharToken(state, "at") && ParseType(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // alignof(expression), a GNU extension
+ if (ParseTwoCharToken(state, "az") && ParseExpression(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // noexcept(expression) appearing as an expression in a dependent signature
+ if (ParseTwoCharToken(state, "nx") && ParseExpression(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // sizeof...(pack)
+ //
+ // <expression> ::= sZ <template-param>
+ // ::= sZ <function-param>
+ if (ParseTwoCharToken(state, "sZ") &&
+ (ParseFunctionParam(state) || ParseTemplateParam(state))) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // sizeof...(pack) captured from an alias template
+ //
+ // <expression> ::= sP <template-arg>* E
+ if (ParseTwoCharToken(state, "sP") && ZeroOrMore(ParseTemplateArg, state) &&
+ ParseOneCharToken(state, 'E')) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // Unary folds (... op pack) and (pack op ...).
+ //
+ // <expression> ::= fl <binary operator-name> <expression>
+ // ::= fr <binary operator-name> <expression>
+ if ((ParseTwoCharToken(state, "fl") || ParseTwoCharToken(state, "fr")) &&
+ ParseOperatorName(state, nullptr) && ParseExpression(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // Binary folds (init op ... op pack) and (pack op ... op init).
+ //
+ // <expression> ::= fL <binary operator-name> <expression> <expression>
+ // ::= fR <binary operator-name> <expression> <expression>
+ if ((ParseTwoCharToken(state, "fL") || ParseTwoCharToken(state, "fR")) &&
+ ParseOperatorName(state, nullptr) && ParseExpression(state) &&
+ ParseExpression(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // tw <expression>: throw e
+ if (ParseTwoCharToken(state, "tw") && ParseExpression(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // tr: throw (rethrows an exception from the handler that caught it)
+ if (ParseTwoCharToken(state, "tr")) return true;
+
// Object and pointer member access expressions.
+ //
+ // <expression> ::= (dt | pt) <expression> <unresolved-name>
if ((ParseTwoCharToken(state, "dt") || ParseTwoCharToken(state, "pt")) &&
- ParseExpression(state) && ParseType(state)) {
+ ParseExpression(state) && ParseUnresolvedName(state)) {
return true;
}
state->parse_state = copy;
@@ -1774,9 +2541,61 @@ static bool ParseExpression(State *state) {
}
state->parse_state = copy;
+ // Vendor extended expressions
+ if (ParseOneCharToken(state, 'u') && ParseSourceName(state) &&
+ ZeroOrMore(ParseTemplateArg, state) && ParseOneCharToken(state, 'E')) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // <expression> ::= rq <requirement>+ E
+ //
+ // https://github.com/itanium-cxx-abi/cxx-abi/issues/24
+ if (ParseTwoCharToken(state, "rq") && OneOrMore(ParseRequirement, state) &&
+ ParseOneCharToken(state, 'E')) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // <expression> ::= rQ <bare-function-type> _ <requirement>+ E
+ //
+ // https://github.com/itanium-cxx-abi/cxx-abi/issues/24
+ if (ParseTwoCharToken(state, "rQ") && ParseBareFunctionType(state) &&
+ ParseOneCharToken(state, '_') && OneOrMore(ParseRequirement, state) &&
+ ParseOneCharToken(state, 'E')) {
+ return true;
+ }
+ state->parse_state = copy;
+
return ParseUnresolvedName(state);
}
+// <initializer> ::= pi <expression>* E
+// ::= il <braced-expression>* E
+//
+// The il ... E form is not in the ABI spec but is seen in practice for
+// braced-init-lists in new-expressions, which are standard syntax from C++11
+// on.
+static bool ParseInitializer(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ ParseState copy = state->parse_state;
+
+ if (ParseTwoCharToken(state, "pi") && ZeroOrMore(ParseExpression, state) &&
+ ParseOneCharToken(state, 'E')) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ if (ParseTwoCharToken(state, "il") &&
+ ZeroOrMore(ParseBracedExpression, state) &&
+ ParseOneCharToken(state, 'E')) {
+ return true;
+ }
+ state->parse_state = copy;
+ return false;
+}
+
// <expr-primary> ::= L <type> <(value) number> E
// ::= L <type> <(value) float> E
// ::= L <mangled-name> E
@@ -1819,10 +2638,35 @@ static bool ParseExprPrimary(State *state) {
return false;
}
- // The merged cast production.
- if (ParseOneCharToken(state, 'L') && ParseType(state) &&
- ParseExprCastValue(state)) {
- return true;
+ if (ParseOneCharToken(state, 'L')) {
+ // There are two special cases in which a literal may or must contain a type
+ // without a value. The first is that both LDnE and LDn0E are valid
+ // encodings of nullptr, used in different situations. Recognize LDnE here,
+ // leaving LDn0E to be recognized by the general logic afterward.
+ if (ParseThreeCharToken(state, "DnE")) return true;
+
+ // The second special case is a string literal, currently mangled in C++98
+ // style as LA<length + 1>_KcE. This is inadequate to support C++11 and
+ // later versions, and the discussion of this problem has not converged.
+ //
+ // https://github.com/itanium-cxx-abi/cxx-abi/issues/64
+ //
+ // For now the bare-type mangling is what's used in practice, so we
+ // recognize this form and only this form if an array type appears here.
+ // Someday we'll probably have to accept a new form of value mangling in
+ // LA...E constructs. (Note also that C++20 allows a wide range of
+ // class-type objects as template arguments, so someday their values will be
+ // mangled and we'll have to recognize them here too.)
+ if (RemainingInput(state)[0] == 'A' /* an array type follows */) {
+ if (ParseType(state) && ParseOneCharToken(state, 'E')) return true;
+ state->parse_state = copy;
+ return false;
+ }
+
+ // The merged cast production.
+ if (ParseType(state) && ParseExprCastValueAndTrailingE(state)) {
+ return true;
+ }
}
state->parse_state = copy;
@@ -1836,7 +2680,7 @@ static bool ParseExprPrimary(State *state) {
}
// <number> or <float>, followed by 'E', as described above ParseExprPrimary.
-static bool ParseExprCastValue(State *state) {
+static bool ParseExprCastValueAndTrailingE(State *state) {
ComplexityGuard guard(state);
if (guard.IsTooComplex()) return false;
// We have to be able to backtrack after accepting a number because we could
@@ -1848,39 +2692,148 @@ static bool ParseExprCastValue(State *state) {
}
state->parse_state = copy;
- if (ParseFloatNumber(state) && ParseOneCharToken(state, 'E')) {
+ if (ParseFloatNumber(state)) {
+ // <float> for ordinary floating-point types
+ if (ParseOneCharToken(state, 'E')) return true;
+
+ // <float> _ <float> for complex floating-point types
+ if (ParseOneCharToken(state, '_') && ParseFloatNumber(state) &&
+ ParseOneCharToken(state, 'E')) {
+ return true;
+ }
+ }
+ state->parse_state = copy;
+
+ return false;
+}
+
+// Parses `Q <requires-clause expr>`.
+// If parsing fails, applies backtracking to `state`.
+//
+// This function covers two symbols instead of one for convenience,
+// because in LLVM's Itanium ABI mangling grammar, <requires-clause expr>
+// always appears after Q.
+//
+// Does not emit the parsed `requires` clause to simplify the implementation.
+// In other words, these two functions' mangled names will demangle identically:
+//
+// template <typename T>
+// int foo(T) requires IsIntegral<T>;
+//
+// vs.
+//
+// template <typename T>
+// int foo(T);
+static bool ParseQRequiresClauseExpr(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ ParseState copy = state->parse_state;
+ DisableAppend(state);
+
+ // <requires-clause expr> is just an <expression>: http://shortn/_9E1Ul0rIM8
+ if (ParseOneCharToken(state, 'Q') && ParseExpression(state)) {
+ RestoreAppend(state, copy.append);
+ return true;
+ }
+
+ // also restores append
+ state->parse_state = copy;
+ return false;
+}
+
+// <requirement> ::= X <expression> [N] [R <type-constraint>]
+// <requirement> ::= T <type>
+// <requirement> ::= Q <constraint-expression>
+//
+// <constraint-expression> ::= <expression>
+//
+// https://github.com/itanium-cxx-abi/cxx-abi/issues/24
+static bool ParseRequirement(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+
+ ParseState copy = state->parse_state;
+
+ if (ParseOneCharToken(state, 'X') && ParseExpression(state) &&
+ Optional(ParseOneCharToken(state, 'N')) &&
+ // This logic backtracks cleanly if we eat an R but a valid type doesn't
+ // follow it.
+ (!ParseOneCharToken(state, 'R') || ParseTypeConstraint(state))) {
return true;
}
state->parse_state = copy;
+ if (ParseOneCharToken(state, 'T') && ParseType(state)) return true;
+ state->parse_state = copy;
+
+ if (ParseOneCharToken(state, 'Q') && ParseExpression(state)) return true;
+ state->parse_state = copy;
+
return false;
}
+// <type-constraint> ::= <name>
+static bool ParseTypeConstraint(State *state) {
+ return ParseName(state);
+}
+
// <local-name> ::= Z <(function) encoding> E <(entity) name> [<discriminator>]
// ::= Z <(function) encoding> E s [<discriminator>]
+// ::= Z <(function) encoding> E d [<(parameter) number>] _ <name>
//
// Parsing a common prefix of these two productions together avoids an
// exponential blowup of backtracking. Parse like:
// <local-name> := Z <encoding> E <local-name-suffix>
// <local-name-suffix> ::= s [<discriminator>]
+// ::= d [<(parameter) number>] _ <name>
// ::= <name> [<discriminator>]
static bool ParseLocalNameSuffix(State *state) {
ComplexityGuard guard(state);
if (guard.IsTooComplex()) return false;
+ ParseState copy = state->parse_state;
+
+ // <local-name-suffix> ::= d [<(parameter) number>] _ <name>
+ if (ParseOneCharToken(state, 'd') &&
+ (IsDigit(RemainingInput(state)[0]) || RemainingInput(state)[0] == '_')) {
+ int number = -1;
+ Optional(ParseNumber(state, &number));
+ if (number < -1 || number > 2147483645) {
+ // Work around overflow cases. We do not expect these outside of a fuzzer
+ // or other source of adversarial input. If we do detect overflow here,
+ // we'll print {default arg#1}.
+ number = -1;
+ }
+ number += 2;
+
+ // The ::{default arg#1}:: infix must be rendered before the lambda itself,
+ // so print this before parsing the rest of the <local-name-suffix>.
+ MaybeAppend(state, "::{default arg#");
+ MaybeAppendDecimal(state, number);
+ MaybeAppend(state, "}::");
+ if (ParseOneCharToken(state, '_') && ParseName(state)) return true;
+
+ // On late parse failure, roll back not only the input but also the output,
+ // whose trailing NUL was overwritten.
+ state->parse_state = copy;
+ if (state->parse_state.append) {
+ state->out[state->parse_state.out_cur_idx] = '\0';
+ }
+ return false;
+ }
+ state->parse_state = copy;
+ // <local-name-suffix> ::= <name> [<discriminator>]
if (MaybeAppend(state, "::") && ParseName(state) &&
Optional(ParseDiscriminator(state))) {
return true;
}
-
- // Since we're not going to overwrite the above "::" by re-parsing the
- // <encoding> (whose trailing '\0' byte was in the byte now holding the
- // first ':'), we have to rollback the "::" if the <name> parse failed.
+ state->parse_state = copy;
if (state->parse_state.append) {
- state->out[state->parse_state.out_cur_idx - 2] = '\0';
+ state->out[state->parse_state.out_cur_idx] = '\0';
}
+ // <local-name-suffix> ::= s [<discriminator>]
return ParseOneCharToken(state, 's') && Optional(ParseDiscriminator(state));
}
@@ -1896,12 +2849,22 @@ static bool ParseLocalName(State *state) {
return false;
}
-// <discriminator> := _ <(non-negative) number>
+// <discriminator> := _ <digit>
+// := __ <number (>= 10)> _
static bool ParseDiscriminator(State *state) {
ComplexityGuard guard(state);
if (guard.IsTooComplex()) return false;
ParseState copy = state->parse_state;
- if (ParseOneCharToken(state, '_') && ParseNumber(state, nullptr)) {
+
+ // Both forms start with _ so parse that first.
+ if (!ParseOneCharToken(state, '_')) return false;
+
+ // <digit>
+ if (ParseDigit(state, nullptr)) return true;
+
+ // _ <number> _
+ if (ParseOneCharToken(state, '_') && ParseNumber(state, nullptr) &&
+ ParseOneCharToken(state, '_')) {
return true;
}
state->parse_state = copy;
@@ -1947,6 +2910,7 @@ static bool ParseSubstitution(State *state, bool accept_std) {
MaybeAppend(state, p->real_name);
}
++state->parse_state.mangled_idx;
+ UpdateHighWaterMark(state);
return true;
}
}
@@ -1972,10 +2936,13 @@ static bool ParseTopLevelMangledName(State *state) {
MaybeAppend(state, RemainingInput(state));
return true;
}
+ ReportHighWaterMark(state);
return false; // Unconsumed suffix.
}
return true;
}
+
+ ReportHighWaterMark(state);
return false;
}
@@ -1985,6 +2952,10 @@ static bool Overflowed(const State *state) {
// The demangler entry point.
bool Demangle(const char* mangled, char* out, size_t out_size) {
+ if (mangled[0] == '_' && mangled[1] == 'R') {
+ return DemangleRustSymbolEncoding(mangled, out, out_size);
+ }
+
State state;
InitState(&state, mangled, out, out_size);
return ParseTopLevelMangledName(&state) && !Overflowed(&state) &&
diff --git a/absl/debugging/internal/demangle.h b/absl/debugging/internal/demangle.h
index 146d1150..cb0aba13 100644
--- a/absl/debugging/internal/demangle.h
+++ b/absl/debugging/internal/demangle.h
@@ -56,6 +56,9 @@ namespace debugging_internal {
//
// See the unit test for more examples.
//
+// Demangle also recognizes Rust mangled names by delegating the parsing of
+// anything that starts with _R to DemangleRustSymbolEncoding (demangle_rust.h).
+//
// Note: we might want to write demanglers for ABIs other than Itanium
// C++ ABI in the future.
bool Demangle(const char* mangled, char* out, size_t out_size);
diff --git a/absl/debugging/internal/demangle_rust.cc b/absl/debugging/internal/demangle_rust.cc
new file mode 100644
index 00000000..4309bd84
--- /dev/null
+++ b/absl/debugging/internal/demangle_rust.cc
@@ -0,0 +1,925 @@
+// Copyright 2024 The Abseil Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/debugging/internal/demangle_rust.h"
+
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <limits>
+
+#include "absl/base/attributes.h"
+#include "absl/base/config.h"
+#include "absl/debugging/internal/decode_rust_punycode.h"
+
+namespace absl {
+ABSL_NAMESPACE_BEGIN
+namespace debugging_internal {
+
+namespace {
+
+// Same step limit as the C++ demangler in demangle.cc uses.
+constexpr int kMaxReturns = 1 << 17;
+
+bool IsDigit(char c) { return '0' <= c && c <= '9'; }
+bool IsLower(char c) { return 'a' <= c && c <= 'z'; }
+bool IsUpper(char c) { return 'A' <= c && c <= 'Z'; }
+bool IsAlpha(char c) { return IsLower(c) || IsUpper(c); }
+bool IsIdentifierChar(char c) { return IsAlpha(c) || IsDigit(c) || c == '_'; }
+bool IsLowerHexDigit(char c) { return IsDigit(c) || ('a' <= c && c <= 'f'); }
+
+const char* BasicTypeName(char c) {
+ switch (c) {
+ case 'a': return "i8";
+ case 'b': return "bool";
+ case 'c': return "char";
+ case 'd': return "f64";
+ case 'e': return "str";
+ case 'f': return "f32";
+ case 'h': return "u8";
+ case 'i': return "isize";
+ case 'j': return "usize";
+ case 'l': return "i32";
+ case 'm': return "u32";
+ case 'n': return "i128";
+ case 'o': return "u128";
+ case 'p': return "_";
+ case 's': return "i16";
+ case 't': return "u16";
+ case 'u': return "()";
+ case 'v': return "...";
+ case 'x': return "i64";
+ case 'y': return "u64";
+ case 'z': return "!";
+ }
+ return nullptr;
+}
+
+// Parser for Rust symbol mangling v0, whose grammar is defined here:
+//
+// https://doc.rust-lang.org/rustc/symbol-mangling/v0.html#symbol-grammar-summary
+class RustSymbolParser {
+ public:
+ // Prepares to demangle the given encoding, a Rust symbol name starting with
+ // _R, into the output buffer [out, out_end). The caller is expected to
+ // continue by calling the new object's Parse function.
+ RustSymbolParser(const char* encoding, char* out, char* const out_end)
+ : encoding_(encoding), out_(out), out_end_(out_end) {
+ if (out_ != out_end_) *out_ = '\0';
+ }
+
+ // Parses the constructor's encoding argument, writing output into the range
+ // [out, out_end). Returns true on success and false for input whose
+ // structure was not recognized or exceeded implementation limits, such as by
+ // nesting structures too deep. In either case *this should not be used
+ // again.
+ ABSL_MUST_USE_RESULT bool Parse() && {
+ // Recursively parses the grammar production named by callee, then resumes
+ // execution at the next statement.
+ //
+ // Recursive-descent parsing is a beautifully readable translation of a
+ // grammar, but it risks stack overflow if implemented by naive recursion on
+ // the C++ call stack. So we simulate recursion by goto and switch instead,
+ // keeping a bounded stack of "return addresses" in the recursion_stack_
+ // member.
+ //
+ // The callee argument is a statement label. We goto that label after
+ // saving the "return address" on recursion_stack_. The next continue
+ // statement in the for loop below "returns" from this "call".
+ //
+ // The caller argument names the return point. Each value of caller must
+ // appear in only one ABSL_DEMANGLER_RECURSE call and be listed in the
+ // definition of enum ReturnAddress. The switch implements the control
+ // transfer from the end of a "called" subroutine back to the statement
+ // after the "call".
+ //
+ // Note that not all the grammar productions have to be packed into the
+ // switch, but only those which appear in a cycle in the grammar. Anything
+ // acyclic can be written as ordinary functions and function calls, e.g.,
+ // ParseIdentifier.
+#define ABSL_DEMANGLER_RECURSE(callee, caller) \
+ do { \
+ if (recursion_depth_ == kStackSize) return false; \
+ /* The next continue will switch on this saved value ... */ \
+ recursion_stack_[recursion_depth_++] = caller; \
+ goto callee; \
+ /* ... and will land here, resuming the suspended code. */ \
+ case caller: {} \
+ } while (0)
+
+ // Parse the encoding, counting completed recursive calls to guard against
+ // excessively complex input and infinite-loop bugs.
+ int iter = 0;
+ goto whole_encoding;
+ for (; iter < kMaxReturns && recursion_depth_ > 0; ++iter) {
+ // This switch resumes the code path most recently suspended by
+ // ABSL_DEMANGLER_RECURSE.
+ switch (recursion_stack_[--recursion_depth_]) {
+ //
+ // symbol-name ->
+ // _R decimal-number? path instantiating-crate? vendor-specific-suffix?
+ whole_encoding:
+ if (!Eat('_') || !Eat('R')) return false;
+ // decimal-number? is always empty today, so proceed to path, which
+ // can't start with a decimal digit.
+ ABSL_DEMANGLER_RECURSE(path, kInstantiatingCrate);
+ if (IsAlpha(Peek())) {
+ ++silence_depth_; // Print nothing more from here on.
+ ABSL_DEMANGLER_RECURSE(path, kVendorSpecificSuffix);
+ }
+ switch (Take()) {
+ case '.': case '$': case '\0': return true;
+ }
+ return false; // unexpected trailing content
+
+ // path -> crate-root | inherent-impl | trait-impl | trait-definition |
+ // nested-path | generic-args | backref
+ //
+ // Note that ABSL_DEMANGLER_RECURSE does not work inside a nested switch
+ // (which would hide the generated case label). Thus we jump out of the
+ // inner switch with gotos before performing any fake recursion.
+ path:
+ switch (Take()) {
+ case 'C': goto crate_root;
+ case 'M': goto inherent_impl;
+ case 'X': goto trait_impl;
+ case 'Y': goto trait_definition;
+ case 'N': goto nested_path;
+ case 'I': goto generic_args;
+ case 'B': goto path_backref;
+ default: return false;
+ }
+
+ // crate-root -> C identifier (C consumed above)
+ crate_root:
+ if (!ParseIdentifier()) return false;
+ continue;
+
+ // inherent-impl -> M impl-path type (M already consumed)
+ inherent_impl:
+ if (!Emit("<")) return false;
+ ABSL_DEMANGLER_RECURSE(impl_path, kInherentImplType);
+ ABSL_DEMANGLER_RECURSE(type, kInherentImplEnding);
+ if (!Emit(">")) return false;
+ continue;
+
+ // trait-impl -> X impl-path type path (X already consumed)
+ trait_impl:
+ if (!Emit("<")) return false;
+ ABSL_DEMANGLER_RECURSE(impl_path, kTraitImplType);
+ ABSL_DEMANGLER_RECURSE(type, kTraitImplInfix);
+ if (!Emit(" as ")) return false;
+ ABSL_DEMANGLER_RECURSE(path, kTraitImplEnding);
+ if (!Emit(">")) return false;
+ continue;
+
+ // impl-path -> disambiguator? path (but never print it!)
+ impl_path:
+ ++silence_depth_;
+ {
+ int ignored_disambiguator;
+ if (!ParseDisambiguator(ignored_disambiguator)) return false;
+ }
+ ABSL_DEMANGLER_RECURSE(path, kImplPathEnding);
+ --silence_depth_;
+ continue;
+
+ // trait-definition -> Y type path (Y already consumed)
+ trait_definition:
+ if (!Emit("<")) return false;
+ ABSL_DEMANGLER_RECURSE(type, kTraitDefinitionInfix);
+ if (!Emit(" as ")) return false;
+ ABSL_DEMANGLER_RECURSE(path, kTraitDefinitionEnding);
+ if (!Emit(">")) return false;
+ continue;
+
+ // nested-path -> N namespace path identifier (N already consumed)
+ // namespace -> lower | upper
+ nested_path:
+ // Uppercase namespaces must be saved on a stack so we can print
+ // ::{closure#0} or ::{shim:vtable#0} or ::{X:name#0} as needed.
+ if (IsUpper(Peek())) {
+ if (!PushNamespace(Take())) return false;
+ ABSL_DEMANGLER_RECURSE(path, kIdentifierInUppercaseNamespace);
+ if (!Emit("::")) return false;
+ if (!ParseIdentifier(PopNamespace())) return false;
+ continue;
+ }
+
+ // Lowercase namespaces, however, are never represented in the output;
+ // they all emit just ::name.
+ if (IsLower(Take())) {
+ ABSL_DEMANGLER_RECURSE(path, kIdentifierInLowercaseNamespace);
+ if (!Emit("::")) return false;
+ if (!ParseIdentifier()) return false;
+ continue;
+ }
+
+ // Neither upper or lower
+ return false;
+
+ // type -> basic-type | array-type | slice-type | tuple-type |
+ // ref-type | mut-ref-type | const-ptr-type | mut-ptr-type |
+ // fn-type | dyn-trait-type | path | backref
+ //
+ // We use ifs instead of switch (Take()) because the default case jumps
+ // to path, which will need to see the first character not yet Taken
+ // from the input. Because we do not use a nested switch here,
+ // ABSL_DEMANGLER_RECURSE works fine in the 'S' case.
+ type:
+ if (IsLower(Peek())) {
+ const char* type_name = BasicTypeName(Take());
+ if (type_name == nullptr || !Emit(type_name)) return false;
+ continue;
+ }
+ if (Eat('A')) {
+ // array-type = A type const
+ if (!Emit("[")) return false;
+ ABSL_DEMANGLER_RECURSE(type, kArraySize);
+ if (!Emit("; ")) return false;
+ ABSL_DEMANGLER_RECURSE(constant, kFinishArray);
+ if (!Emit("]")) return false;
+ continue;
+ }
+ if (Eat('S')) {
+ if (!Emit("[")) return false;
+ ABSL_DEMANGLER_RECURSE(type, kSliceEnding);
+ if (!Emit("]")) return false;
+ continue;
+ }
+ if (Eat('T')) goto tuple_type;
+ if (Eat('R')) {
+ if (!Emit("&")) return false;
+ if (!ParseOptionalLifetime()) return false;
+ goto type;
+ }
+ if (Eat('Q')) {
+ if (!Emit("&mut ")) return false;
+ if (!ParseOptionalLifetime()) return false;
+ goto type;
+ }
+ if (Eat('P')) {
+ if (!Emit("*const ")) return false;
+ goto type;
+ }
+ if (Eat('O')) {
+ if (!Emit("*mut ")) return false;
+ goto type;
+ }
+ if (Eat('F')) goto fn_type;
+ if (Eat('D')) goto dyn_trait_type;
+ if (Eat('B')) goto type_backref;
+ goto path;
+
+ // tuple-type -> T type* E (T already consumed)
+ tuple_type:
+ if (!Emit("(")) return false;
+
+ // The toolchain should call the unit type u instead of TE, but the
+ // grammar and other demanglers also recognize TE, so we do too.
+ if (Eat('E')) {
+ if (!Emit(")")) return false;
+ continue;
+ }
+
+ // A tuple with one element is rendered (type,) instead of (type).
+ ABSL_DEMANGLER_RECURSE(type, kAfterFirstTupleElement);
+ if (Eat('E')) {
+ if (!Emit(",)")) return false;
+ continue;
+ }
+
+ // A tuple with two elements is of course (x, y).
+ if (!Emit(", ")) return false;
+ ABSL_DEMANGLER_RECURSE(type, kAfterSecondTupleElement);
+ if (Eat('E')) {
+ if (!Emit(")")) return false;
+ continue;
+ }
+
+ // And (x, y, z) for three elements.
+ if (!Emit(", ")) return false;
+ ABSL_DEMANGLER_RECURSE(type, kAfterThirdTupleElement);
+ if (Eat('E')) {
+ if (!Emit(")")) return false;
+ continue;
+ }
+
+ // For longer tuples we write (x, y, z, ...), printing none of the
+ // content of the fourth and later types. Thus we avoid exhausting
+ // output buffers and human readers' patience when some library has a
+ // long tuple as an implementation detail, without having to
+ // completely obfuscate all tuples.
+ if (!Emit(", ...)")) return false;
+ ++silence_depth_;
+ while (!Eat('E')) {
+ ABSL_DEMANGLER_RECURSE(type, kAfterSubsequentTupleElement);
+ }
+ --silence_depth_;
+ continue;
+
+ // fn-type -> F fn-sig (F already consumed)
+ // fn-sig -> binder? U? (K abi)? type* E type
+ // abi -> C | undisambiguated-identifier
+ //
+ // We follow the C++ demangler in suppressing details of function
+ // signatures. Every function type is rendered "fn...".
+ fn_type:
+ if (!Emit("fn...")) return false;
+ ++silence_depth_;
+ if (!ParseOptionalBinder()) return false;
+ (void)Eat('U');
+ if (Eat('K')) {
+ if (!Eat('C') && !ParseUndisambiguatedIdentifier()) return false;
+ }
+ while (!Eat('E')) {
+ ABSL_DEMANGLER_RECURSE(type, kContinueParameterList);
+ }
+ ABSL_DEMANGLER_RECURSE(type, kFinishFn);
+ --silence_depth_;
+ continue;
+
+ // dyn-trait-type -> D dyn-bounds lifetime (D already consumed)
+ // dyn-bounds -> binder? dyn-trait* E
+ //
+ // The grammar strangely allows an empty trait list, even though the
+ // compiler should never output one. We follow existing demanglers in
+ // rendering DEL_ as "dyn ".
+ //
+ // Because auto traits lengthen a type name considerably without
+ // providing much value to a search for related source code, it would be
+ // desirable to abbreviate
+ // dyn main::Trait + std::marker::Copy + std::marker::Send
+ // to
+ // dyn main::Trait + ...,
+ // eliding the auto traits. But it is difficult to do so correctly, in
+ // part because there is no guarantee that the mangling will list the
+ // main trait first. So we just print all the traits in their order of
+ // appearance in the mangled name.
+ dyn_trait_type:
+ if (!Emit("dyn ")) return false;
+ if (!ParseOptionalBinder()) return false;
+ if (!Eat('E')) {
+ ABSL_DEMANGLER_RECURSE(dyn_trait, kBeginAutoTraits);
+ while (!Eat('E')) {
+ if (!Emit(" + ")) return false;
+ ABSL_DEMANGLER_RECURSE(dyn_trait, kContinueAutoTraits);
+ }
+ }
+ if (!ParseRequiredLifetime()) return false;
+ continue;
+
+ // dyn-trait -> path dyn-trait-assoc-binding*
+ // dyn-trait-assoc-binding -> p undisambiguated-identifier type
+ //
+ // We render nonempty binding lists as <>, omitting their contents as
+ // for generic-args.
+ dyn_trait:
+ ABSL_DEMANGLER_RECURSE(path, kContinueDynTrait);
+ if (Peek() == 'p') {
+ if (!Emit("<>")) return false;
+ ++silence_depth_;
+ while (Eat('p')) {
+ if (!ParseUndisambiguatedIdentifier()) return false;
+ ABSL_DEMANGLER_RECURSE(type, kContinueAssocBinding);
+ }
+ --silence_depth_;
+ }
+ continue;
+
+ // const -> type const-data | p | backref
+ //
+ // const is a C++ keyword, so we use the label `constant` instead.
+ constant:
+ if (Eat('B')) goto const_backref;
+ if (Eat('p')) {
+ if (!Emit("_")) return false;
+ continue;
+ }
+
+ // Scan the type without printing it.
+ //
+ // The Rust language restricts the type of a const generic argument
+ // much more than the mangling grammar does. We do not enforce this.
+ //
+ // We also do not bother printing false, true, 'A', and '\u{abcd}' for
+ // the types bool and char. Because we do not print generic-args
+ // contents, we expect to print constants only in array sizes, and
+ // those should not be bool or char.
+ ++silence_depth_;
+ ABSL_DEMANGLER_RECURSE(type, kConstData);
+ --silence_depth_;
+
+ // const-data -> n? hex-digit* _
+ //
+ // Although the grammar doesn't say this, existing demanglers expect
+ // that zero is 0, not an empty digit sequence, and no nonzero value
+ // may have leading zero digits. Also n0_ is accepted and printed as
+ // -0, though a toolchain will probably never write that encoding.
+ if (Eat('n') && !EmitChar('-')) return false;
+ if (!Emit("0x")) return false;
+ if (Eat('0')) {
+ if (!EmitChar('0')) return false;
+ if (!Eat('_')) return false;
+ continue;
+ }
+ while (IsLowerHexDigit(Peek())) {
+ if (!EmitChar(Take())) return false;
+ }
+ if (!Eat('_')) return false;
+ continue;
+
+ // generic-args -> I path generic-arg* E (I already consumed)
+ //
+ // We follow the C++ demangler in omitting all the arguments from the
+ // output, printing only the list opening and closing tokens.
+ generic_args:
+ ABSL_DEMANGLER_RECURSE(path, kBeginGenericArgList);
+ if (!Emit("::<>")) return false;
+ ++silence_depth_;
+ while (!Eat('E')) {
+ ABSL_DEMANGLER_RECURSE(generic_arg, kContinueGenericArgList);
+ }
+ --silence_depth_;
+ continue;
+
+ // generic-arg -> lifetime | type | K const
+ generic_arg:
+ if (Peek() == 'L') {
+ if (!ParseOptionalLifetime()) return false;
+ continue;
+ }
+ if (Eat('K')) goto constant;
+ goto type;
+
+ // backref -> B base-62-number (B already consumed)
+ //
+ // The BeginBackref call parses and range-checks the base-62-number. We
+ // always do that much.
+ //
+ // The recursive call parses and prints what the backref points at. We
+ // save CPU and stack by skipping this work if the output would be
+ // suppressed anyway.
+ path_backref:
+ if (!BeginBackref()) return false;
+ if (silence_depth_ == 0) {
+ ABSL_DEMANGLER_RECURSE(path, kPathBackrefEnding);
+ }
+ EndBackref();
+ continue;
+
+ // This represents the same backref production as in path_backref but
+ // parses the target as a type instead of a path.
+ type_backref:
+ if (!BeginBackref()) return false;
+ if (silence_depth_ == 0) {
+ ABSL_DEMANGLER_RECURSE(type, kTypeBackrefEnding);
+ }
+ EndBackref();
+ continue;
+
+ const_backref:
+ if (!BeginBackref()) return false;
+ if (silence_depth_ == 0) {
+ ABSL_DEMANGLER_RECURSE(constant, kConstantBackrefEnding);
+ }
+ EndBackref();
+ continue;
+ }
+ }
+
+ return false; // hit iteration limit or a bug in our stack handling
+ }
+
+ private:
+ // Enumerates resumption points for ABSL_DEMANGLER_RECURSE calls.
+ enum ReturnAddress : uint8_t {
+ kInstantiatingCrate,
+ kVendorSpecificSuffix,
+ kIdentifierInUppercaseNamespace,
+ kIdentifierInLowercaseNamespace,
+ kInherentImplType,
+ kInherentImplEnding,
+ kTraitImplType,
+ kTraitImplInfix,
+ kTraitImplEnding,
+ kImplPathEnding,
+ kTraitDefinitionInfix,
+ kTraitDefinitionEnding,
+ kArraySize,
+ kFinishArray,
+ kSliceEnding,
+ kAfterFirstTupleElement,
+ kAfterSecondTupleElement,
+ kAfterThirdTupleElement,
+ kAfterSubsequentTupleElement,
+ kContinueParameterList,
+ kFinishFn,
+ kBeginAutoTraits,
+ kContinueAutoTraits,
+ kContinueDynTrait,
+ kContinueAssocBinding,
+ kConstData,
+ kBeginGenericArgList,
+ kContinueGenericArgList,
+ kPathBackrefEnding,
+ kTypeBackrefEnding,
+ kConstantBackrefEnding,
+ };
+
+ // Element counts for the stack arrays. Larger stack sizes accommodate more
+ // deeply nested names at the cost of a larger footprint on the C++ call
+ // stack.
+ enum {
+ // Maximum recursive calls outstanding at one time.
+ kStackSize = 256,
+
+ // Maximum N<uppercase> nested-paths open at once. We do not expect
+ // closures inside closures inside closures as much as functions inside
+ // modules inside other modules, so we can use a smaller array here.
+ kNamespaceStackSize = 64,
+
+ // Maximum number of nested backrefs. We can keep this stack pretty small
+ // because we do not follow backrefs inside generic-args or other contexts
+ // that suppress printing, so deep stacking is unlikely in practice.
+ kPositionStackSize = 16,
+ };
+
+ // Returns the next input character without consuming it.
+ char Peek() const { return encoding_[pos_]; }
+
+ // Consumes and returns the next input character.
+ char Take() { return encoding_[pos_++]; }
+
+ // If the next input character is the given character, consumes it and returns
+ // true; otherwise returns false without consuming a character.
+ ABSL_MUST_USE_RESULT bool Eat(char want) {
+ if (encoding_[pos_] != want) return false;
+ ++pos_;
+ return true;
+ }
+
+ // Provided there is enough remaining output space, appends c to the output,
+ // writing a fresh NUL terminator afterward, and returns true. Returns false
+ // if the output buffer had less than two bytes free.
+ ABSL_MUST_USE_RESULT bool EmitChar(char c) {
+ if (silence_depth_ > 0) return true;
+ if (out_end_ - out_ < 2) return false;
+ *out_++ = c;
+ *out_ = '\0';
+ return true;
+ }
+
+ // Provided there is enough remaining output space, appends the C string token
+ // to the output, followed by a NUL character, and returns true. Returns
+ // false if not everything fit into the output buffer.
+ ABSL_MUST_USE_RESULT bool Emit(const char* token) {
+ if (silence_depth_ > 0) return true;
+ const size_t token_length = std::strlen(token);
+ const size_t bytes_to_copy = token_length + 1; // token and final NUL
+ if (static_cast<size_t>(out_end_ - out_) < bytes_to_copy) return false;
+ std::memcpy(out_, token, bytes_to_copy);
+ out_ += token_length;
+ return true;
+ }
+
+ // Provided there is enough remaining output space, appends the decimal form
+ // of disambiguator (if it's nonnegative) or "?" (if it's negative) to the
+ // output, followed by a NUL character, and returns true. Returns false if
+ // not everything fit into the output buffer.
+ ABSL_MUST_USE_RESULT bool EmitDisambiguator(int disambiguator) {
+ if (disambiguator < 0) return EmitChar('?'); // parsed but too large
+ if (disambiguator == 0) return EmitChar('0');
+ // Convert disambiguator to decimal text. Three digits per byte is enough
+ // because 999 > 256. The bound will remain correct even if future
+ // maintenance changes the type of the disambiguator variable.
+ char digits[3 * sizeof(disambiguator)] = {};
+ size_t leading_digit_index = sizeof(digits) - 1;
+ for (; disambiguator > 0; disambiguator /= 10) {
+ digits[--leading_digit_index] =
+ static_cast<char>('0' + disambiguator % 10);
+ }
+ return Emit(digits + leading_digit_index);
+ }
+
+ // Consumes an optional disambiguator (s123_) from the input.
+ //
+ // On success returns true and fills value with the encoded value if it was
+ // not too big, otherwise with -1. If the optional disambiguator was omitted,
+ // value is 0. On parse failure returns false and sets value to -1.
+ ABSL_MUST_USE_RESULT bool ParseDisambiguator(int& value) {
+ value = -1;
+
+ // disambiguator = s base-62-number
+ //
+ // Disambiguators are optional. An omitted disambiguator is zero.
+ if (!Eat('s')) {
+ value = 0;
+ return true;
+ }
+ int base_62_value = 0;
+ if (!ParseBase62Number(base_62_value)) return false;
+ value = base_62_value < 0 ? -1 : base_62_value + 1;
+ return true;
+ }
+
+ // Consumes a base-62 number like _ or 123_ from the input.
+ //
+ // On success returns true and fills value with the encoded value if it was
+ // not too big, otherwise with -1. On parse failure returns false and sets
+ // value to -1.
+ ABSL_MUST_USE_RESULT bool ParseBase62Number(int& value) {
+ value = -1;
+
+ // base-62-number = (digit | lower | upper)* _
+ //
+ // An empty base-62 digit sequence means 0.
+ if (Eat('_')) {
+ value = 0;
+ return true;
+ }
+
+ // A nonempty digit sequence denotes its base-62 value plus 1.
+ int encoded_number = 0;
+ bool overflowed = false;
+ while (IsAlpha(Peek()) || IsDigit(Peek())) {
+ const char c = Take();
+ if (encoded_number >= std::numeric_limits<int>::max()/62) {
+ // If we are close to overflowing an int, keep parsing but stop updating
+ // encoded_number and remember to return -1 at the end. The point is to
+ // avoid undefined behavior while parsing crate-root disambiguators,
+ // which are large in practice but not shown in demangling, while
+ // successfully computing closure and shim disambiguators, which are
+ // typically small and are printed out.
+ overflowed = true;
+ } else {
+ int digit;
+ if (IsDigit(c)) {
+ digit = c - '0';
+ } else if (IsLower(c)) {
+ digit = c - 'a' + 10;
+ } else {
+ digit = c - 'A' + 36;
+ }
+ encoded_number = 62 * encoded_number + digit;
+ }
+ }
+
+ if (!Eat('_')) return false;
+ if (!overflowed) value = encoded_number + 1;
+ return true;
+ }
+
+ // Consumes an identifier from the input, returning true on success.
+ //
+ // A nonzero uppercase_namespace specifies the character after the N in a
+ // nested-identifier, e.g., 'C' for a closure, allowing ParseIdentifier to
+ // write out the name with the conventional decoration for that namespace.
+ ABSL_MUST_USE_RESULT bool ParseIdentifier(char uppercase_namespace = '\0') {
+ // identifier -> disambiguator? undisambiguated-identifier
+ int disambiguator = 0;
+ if (!ParseDisambiguator(disambiguator)) return false;
+
+ return ParseUndisambiguatedIdentifier(uppercase_namespace, disambiguator);
+ }
+
+ // Consumes from the input an identifier with no preceding disambiguator,
+ // returning true on success.
+ //
+ // When ParseIdentifier calls this, it passes the N<namespace> character and
+ // disambiguator value so that "{closure#42}" and similar forms can be
+ // rendered correctly.
+ //
+ // At other appearances of undisambiguated-identifier in the grammar, this
+ // treatment is not applicable, and the call site omits both arguments.
+ ABSL_MUST_USE_RESULT bool ParseUndisambiguatedIdentifier(
+ char uppercase_namespace = '\0', int disambiguator = 0) {
+ // undisambiguated-identifier -> u? decimal-number _? bytes
+ const bool is_punycoded = Eat('u');
+ if (!IsDigit(Peek())) return false;
+ int num_bytes = 0;
+ if (!ParseDecimalNumber(num_bytes)) return false;
+ (void)Eat('_'); // optional separator, needed if a digit follows
+ if (is_punycoded) {
+ DecodeRustPunycodeOptions options;
+ options.punycode_begin = &encoding_[pos_];
+ options.punycode_end = &encoding_[pos_] + num_bytes;
+ options.out_begin = out_;
+ options.out_end = out_end_;
+ out_ = DecodeRustPunycode(options);
+ if (out_ == nullptr) return false;
+ pos_ += static_cast<size_t>(num_bytes);
+ }
+
+ // Emit the beginnings of braced forms like {shim:vtable#0}.
+ if (uppercase_namespace != '\0') {
+ switch (uppercase_namespace) {
+ case 'C':
+ if (!Emit("{closure")) return false;
+ break;
+ case 'S':
+ if (!Emit("{shim")) return false;
+ break;
+ default:
+ if (!EmitChar('{') || !EmitChar(uppercase_namespace)) return false;
+ break;
+ }
+ if (num_bytes > 0 && !Emit(":")) return false;
+ }
+
+ // Emit the name itself.
+ if (!is_punycoded) {
+ for (int i = 0; i < num_bytes; ++i) {
+ const char c = Take();
+ if (!IsIdentifierChar(c) &&
+ // The spec gives toolchains the choice of Punycode or raw UTF-8 for
+ // identifiers containing code points above 0x7f, so accept bytes
+ // with the high bit set.
+ (c & 0x80) == 0) {
+ return false;
+ }
+ if (!EmitChar(c)) return false;
+ }
+ }
+
+ // Emit the endings of braced forms, e.g., "#42}".
+ if (uppercase_namespace != '\0') {
+ if (!EmitChar('#')) return false;
+ if (!EmitDisambiguator(disambiguator)) return false;
+ if (!EmitChar('}')) return false;
+ }
+
+ return true;
+ }
+
+ // Consumes a decimal number like 0 or 123 from the input. On success returns
+ // true and fills value with the encoded value. If the encoded value is too
+ // large or otherwise unparsable, returns false and sets value to -1.
+ ABSL_MUST_USE_RESULT bool ParseDecimalNumber(int& value) {
+ value = -1;
+ if (!IsDigit(Peek())) return false;
+ int encoded_number = Take() - '0';
+ if (encoded_number == 0) {
+ // Decimal numbers are never encoded with extra leading zeroes.
+ value = 0;
+ return true;
+ }
+ while (IsDigit(Peek()) &&
+ // avoid overflow
+ encoded_number < std::numeric_limits<int>::max()/10) {
+ encoded_number = 10 * encoded_number + (Take() - '0');
+ }
+ if (IsDigit(Peek())) return false; // too big
+ value = encoded_number;
+ return true;
+ }
+
+ // Consumes a binder of higher-ranked lifetimes if one is present. On success
+ // returns true and discards the encoded lifetime count. On parse failure
+ // returns false.
+ ABSL_MUST_USE_RESULT bool ParseOptionalBinder() {
+ // binder -> G base-62-number
+ if (!Eat('G')) return true;
+ int ignored_binding_count;
+ return ParseBase62Number(ignored_binding_count);
+ }
+
+ // Consumes a lifetime if one is present.
+ //
+ // On success returns true and discards the lifetime index. We do not print
+ // or even range-check lifetimes because they are a finer detail than other
+ // things we omit from output, such as the entire contents of generic-args.
+ //
+ // On parse failure returns false.
+ ABSL_MUST_USE_RESULT bool ParseOptionalLifetime() {
+ // lifetime -> L base-62-number
+ if (!Eat('L')) return true;
+ int ignored_de_bruijn_index;
+ return ParseBase62Number(ignored_de_bruijn_index);
+ }
+
+ // Consumes a lifetime just like ParseOptionalLifetime, but returns false if
+ // there is no lifetime here.
+ ABSL_MUST_USE_RESULT bool ParseRequiredLifetime() {
+ if (Peek() != 'L') return false;
+ return ParseOptionalLifetime();
+ }
+
+ // Pushes ns onto the namespace stack and returns true if the stack is not
+ // full, else returns false.
+ ABSL_MUST_USE_RESULT bool PushNamespace(char ns) {
+ if (namespace_depth_ == kNamespaceStackSize) return false;
+ namespace_stack_[namespace_depth_++] = ns;
+ return true;
+ }
+
+ // Pops the last pushed namespace. Requires that the namespace stack is not
+ // empty (namespace_depth_ > 0).
+ char PopNamespace() { return namespace_stack_[--namespace_depth_]; }
+
+ // Pushes position onto the position stack and returns true if the stack is
+ // not full, else returns false.
+ ABSL_MUST_USE_RESULT bool PushPosition(int position) {
+ if (position_depth_ == kPositionStackSize) return false;
+ position_stack_[position_depth_++] = position;
+ return true;
+ }
+
+ // Pops the last pushed input position. Requires that the position stack is
+ // not empty (position_depth_ > 0).
+ int PopPosition() { return position_stack_[--position_depth_]; }
+
+ // Consumes a base-62-number denoting a backref target, pushes the current
+ // input position on the data stack, and sets the input position to the
+ // beginning of the backref target. Returns true on success. Returns false
+ // if parsing failed, the stack is exhausted, or the backref target position
+ // is out of range.
+ ABSL_MUST_USE_RESULT bool BeginBackref() {
+ // backref = B base-62-number (B already consumed)
+ //
+ // Reject backrefs that don't parse, overflow int, or don't point backward.
+ // If the offset looks fine, adjust it to account for the _R prefix.
+ int offset = 0;
+ const int offset_of_this_backref =
+ pos_ - 2 /* _R */ - 1 /* B already consumed */;
+ if (!ParseBase62Number(offset) || offset < 0 ||
+ offset >= offset_of_this_backref) {
+ return false;
+ }
+ offset += 2;
+
+ // Save the old position to restore later.
+ if (!PushPosition(pos_)) return false;
+
+ // Move the input position to the backref target.
+ //
+ // Note that we do not check whether the new position points to the
+ // beginning of a construct matching the context in which the backref
+ // appeared. We just jump to it and see whether nested parsing succeeds.
+ // We therefore accept various wrong manglings, e.g., a type backref
+ // pointing to an 'l' character inside an identifier, which happens to mean
+ // i32 when parsed as a type mangling. This saves the complexity and RAM
+ // footprint of remembering which offsets began which kinds of
+ // substructures. Existing demanglers take similar shortcuts.
+ pos_ = offset;
+ return true;
+ }
+
+ // Cleans up after a backref production by restoring the previous input
+ // position from the data stack.
+ void EndBackref() { pos_ = PopPosition(); }
+
+ // The leftmost recursion_depth_ elements of recursion_stack_ contain the
+ // ReturnAddresses pushed by ABSL_DEMANGLER_RECURSE calls not yet completed.
+ ReturnAddress recursion_stack_[kStackSize] = {};
+ int recursion_depth_ = 0;
+
+ // The leftmost namespace_depth_ elements of namespace_stack_ contain the
+ // uppercase namespace identifiers for open nested-paths, e.g., 'C' for a
+ // closure.
+ char namespace_stack_[kNamespaceStackSize] = {};
+ int namespace_depth_ = 0;
+
+ // The leftmost position_depth_ elements of position_stack_ contain the input
+ // positions to return to after fully printing the targets of backrefs.
+ int position_stack_[kPositionStackSize] = {};
+ int position_depth_ = 0;
+
+ // Anything parsed while silence_depth_ > 0 contributes nothing to the
+ // demangled output. For constructs omitted from the demangling, such as
+ // impl-path and the contents of generic-args, we will increment
+ // silence_depth_ on the way in and decrement silence_depth_ on the way out.
+ int silence_depth_ = 0;
+
+ // Input: encoding_ points to a Rust mangled symbol, and encoding_[pos_] is
+ // the next input character to be scanned.
+ int pos_ = 0;
+ const char* encoding_ = nullptr;
+
+ // Output: *out_ is where the next output character should be written, and
+ // out_end_ points past the last byte of available space.
+ char* out_ = nullptr;
+ char* out_end_ = nullptr;
+};
+
+} // namespace
+
+bool DemangleRustSymbolEncoding(const char* mangled, char* out,
+ size_t out_size) {
+ return RustSymbolParser(mangled, out, out + out_size).Parse();
+}
+
+} // namespace debugging_internal
+ABSL_NAMESPACE_END
+} // namespace absl
diff --git a/absl/debugging/internal/demangle_rust.h b/absl/debugging/internal/demangle_rust.h
new file mode 100644
index 00000000..94a9aecb
--- /dev/null
+++ b/absl/debugging/internal/demangle_rust.h
@@ -0,0 +1,42 @@
+// Copyright 2024 The Abseil Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ABSL_DEBUGGING_INTERNAL_DEMANGLE_RUST_H_
+#define ABSL_DEBUGGING_INTERNAL_DEMANGLE_RUST_H_
+
+#include <cstddef>
+
+#include "absl/base/config.h"
+
+namespace absl {
+ABSL_NAMESPACE_BEGIN
+namespace debugging_internal {
+
+// Demangle the Rust encoding `mangled`. On success, return true and write the
+// demangled symbol name to `out`. Otherwise, return false, leaving unspecified
+// contents in `out`. For example, calling DemangleRustSymbolEncoding with
+// `mangled = "_RNvC8my_crate7my_func"` will yield `my_crate::my_func` in `out`,
+// provided `out_size` is large enough for that value and its trailing NUL.
+//
+// DemangleRustSymbolEncoding is async-signal-safe and runs in bounded C++
+// call-stack space. It is suitable for symbolizing stack traces in a signal
+// handler.
+bool DemangleRustSymbolEncoding(const char* mangled, char* out,
+ size_t out_size);
+
+} // namespace debugging_internal
+ABSL_NAMESPACE_END
+} // namespace absl
+
+#endif // ABSL_DEBUGGING_INTERNAL_DEMANGLE_RUST_H_
diff --git a/absl/debugging/internal/demangle_rust_test.cc b/absl/debugging/internal/demangle_rust_test.cc
new file mode 100644
index 00000000..8ceb1fd7
--- /dev/null
+++ b/absl/debugging/internal/demangle_rust_test.cc
@@ -0,0 +1,584 @@
+// Copyright 2024 The Abseil Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/debugging/internal/demangle_rust.h"
+
+#include <cstddef>
+#include <string>
+
+#include "gtest/gtest.h"
+#include "absl/base/config.h"
+
+namespace absl {
+ABSL_NAMESPACE_BEGIN
+namespace debugging_internal {
+namespace {
+
+// If DemangleRustSymbolEncoding(mangled, <buffer with room for buffer_size
+// chars>, buffer_size) returns true and seems not to have overrun its output
+// buffer, returns the string written by DemangleRustSymbolEncoding; otherwise
+// returns an error message.
+std::string ResultOfDemangling(const char* mangled, size_t buffer_size) {
+ // Fill the buffer with something other than NUL so we test whether Demangle
+ // appends trailing NUL as expected.
+ std::string buffer(buffer_size + 1, '~');
+ constexpr char kCanaryCharacter = 0x7f; // arbitrary unlikely value
+ buffer[buffer_size] = kCanaryCharacter;
+ if (!DemangleRustSymbolEncoding(mangled, &buffer[0], buffer_size)) {
+ return "Failed parse";
+ }
+ if (buffer[buffer_size] != kCanaryCharacter) {
+ return "Buffer overrun by output: " + buffer.substr(0, buffer_size + 1)
+ + "...";
+ }
+ return buffer.data(); // Not buffer itself: this trims trailing padding.
+}
+
+// Tests that DemangleRustSymbolEncoding converts mangled into plaintext given
+// enough output buffer space but returns false and avoids overrunning a buffer
+// that is one byte too short.
+//
+// The lambda wrapping allows ASSERT_EQ to branch out the first time an
+// expectation is not satisfied, preventing redundant errors for the same bug.
+//
+// We test first with excess space so that if the algorithm just computes the
+// wrong answer, it will be clear from the error log that the bounds checks are
+// unlikely to be the code at fault.
+#define EXPECT_DEMANGLING(mangled, plaintext) \
+ do { \
+ [] { \
+ constexpr size_t plenty_of_space = sizeof(plaintext) + 128; \
+ constexpr size_t just_enough_space = sizeof(plaintext); \
+ constexpr size_t one_byte_too_few = sizeof(plaintext) - 1; \
+ const char* expected_plaintext = plaintext; \
+ const char* expected_error = "Failed parse"; \
+ ASSERT_EQ(ResultOfDemangling(mangled, plenty_of_space), \
+ expected_plaintext); \
+ ASSERT_EQ(ResultOfDemangling(mangled, just_enough_space), \
+ expected_plaintext); \
+ ASSERT_EQ(ResultOfDemangling(mangled, one_byte_too_few), \
+ expected_error); \
+ }(); \
+ } while (0)
+
+// Tests that DemangleRustSymbolEncoding rejects the given input (typically, a
+// truncation of a real Rust symbol name).
+#define EXPECT_DEMANGLING_FAILS(mangled) \
+ do { \
+ constexpr size_t plenty_of_space = 1024; \
+ const char* expected_error = "Failed parse"; \
+ EXPECT_EQ(ResultOfDemangling(mangled, plenty_of_space), expected_error); \
+ } while (0)
+
+// Piping grep -C 1 _R demangle_test.cc into your favorite c++filt
+// implementation allows you to verify that the goldens below are reasonable.
+
+TEST(DemangleRust, EmptyDemangling) {
+ EXPECT_TRUE(DemangleRustSymbolEncoding("_RC0", nullptr, 0));
+}
+
+TEST(DemangleRust, FunctionAtCrateLevel) {
+ EXPECT_DEMANGLING("_RNvC10crate_name9func_name", "crate_name::func_name");
+ EXPECT_DEMANGLING(
+ "_RNvCs09azAZ_10crate_name9func_name", "crate_name::func_name");
+}
+
+TEST(DemangleRust, TruncationsOfFunctionAtCrateLevel) {
+ EXPECT_DEMANGLING_FAILS("_R");
+ EXPECT_DEMANGLING_FAILS("_RN");
+ EXPECT_DEMANGLING_FAILS("_RNvC");
+ EXPECT_DEMANGLING_FAILS("_RNvC10");
+ EXPECT_DEMANGLING_FAILS("_RNvC10crate_nam");
+ EXPECT_DEMANGLING_FAILS("_RNvC10crate_name");
+ EXPECT_DEMANGLING_FAILS("_RNvC10crate_name9");
+ EXPECT_DEMANGLING_FAILS("_RNvC10crate_name9func_nam");
+ EXPECT_DEMANGLING_FAILS("_RNvCs");
+ EXPECT_DEMANGLING_FAILS("_RNvCs09azAZ");
+ EXPECT_DEMANGLING_FAILS("_RNvCs09azAZ_");
+}
+
+TEST(DemangleRust, VendorSuffixes) {
+ EXPECT_DEMANGLING("_RNvC10crate_name9func_name.!@#", "crate_name::func_name");
+ EXPECT_DEMANGLING("_RNvC10crate_name9func_name$!@#", "crate_name::func_name");
+}
+
+TEST(DemangleRust, UnicodeIdentifiers) {
+ EXPECT_DEMANGLING("_RNvC7ice_cap17Eyjafjallajökull",
+ "ice_cap::Eyjafjallajökull");
+ EXPECT_DEMANGLING("_RNvC7ice_caps_u19Eyjafjallajkull_jtb",
+ "ice_cap::Eyjafjallajökull");
+}
+
+TEST(DemangleRust, FunctionInModule) {
+ EXPECT_DEMANGLING("_RNvNtCs09azAZ_10crate_name11module_name9func_name",
+ "crate_name::module_name::func_name");
+}
+
+TEST(DemangleRust, FunctionInFunction) {
+ EXPECT_DEMANGLING(
+ "_RNvNvCs09azAZ_10crate_name15outer_func_name15inner_func_name",
+ "crate_name::outer_func_name::inner_func_name");
+}
+
+TEST(DemangleRust, ClosureInFunction) {
+ EXPECT_DEMANGLING(
+ "_RNCNvCs09azAZ_10crate_name9func_name0",
+ "crate_name::func_name::{closure#0}");
+ EXPECT_DEMANGLING(
+ "_RNCNvCs09azAZ_10crate_name9func_name0Cs123_12client_crate",
+ "crate_name::func_name::{closure#0}");
+}
+
+TEST(DemangleRust, ClosureNumbering) {
+ EXPECT_DEMANGLING(
+ "_RNCNvCs09azAZ_10crate_name9func_names_0Cs123_12client_crate",
+ "crate_name::func_name::{closure#1}");
+ EXPECT_DEMANGLING(
+ "_RNCNvCs09azAZ_10crate_name9func_names0_0Cs123_12client_crate",
+ "crate_name::func_name::{closure#2}");
+ EXPECT_DEMANGLING(
+ "_RNCNvCs09azAZ_10crate_name9func_names9_0Cs123_12client_crate",
+ "crate_name::func_name::{closure#11}");
+ EXPECT_DEMANGLING(
+ "_RNCNvCs09azAZ_10crate_name9func_namesa_0Cs123_12client_crate",
+ "crate_name::func_name::{closure#12}");
+ EXPECT_DEMANGLING(
+ "_RNCNvCs09azAZ_10crate_name9func_namesz_0Cs123_12client_crate",
+ "crate_name::func_name::{closure#37}");
+ EXPECT_DEMANGLING(
+ "_RNCNvCs09azAZ_10crate_name9func_namesA_0Cs123_12client_crate",
+ "crate_name::func_name::{closure#38}");
+ EXPECT_DEMANGLING(
+ "_RNCNvCs09azAZ_10crate_name9func_namesZ_0Cs123_12client_crate",
+ "crate_name::func_name::{closure#63}");
+ EXPECT_DEMANGLING(
+ "_RNCNvCs09azAZ_10crate_name9func_names10_0Cs123_12client_crate",
+ "crate_name::func_name::{closure#64}");
+ EXPECT_DEMANGLING(
+ "_RNCNvCs09azAZ_10crate_name9func_namesg6_0Cs123_12client_crate",
+ "crate_name::func_name::{closure#1000}");
+}
+
+TEST(DemangleRust, ClosureNumberOverflowingInt) {
+ EXPECT_DEMANGLING(
+ "_RNCNvCs09azAZ_10crate_name9func_names1234567_0Cs123_12client_crate",
+ "crate_name::func_name::{closure#?}");
+}
+
+TEST(DemangleRust, UnexpectedlyNamedClosure) {
+ EXPECT_DEMANGLING(
+ "_RNCNvCs123_10crate_name9func_name12closure_nameCs456_12client_crate",
+ "crate_name::func_name::{closure:closure_name#0}");
+ EXPECT_DEMANGLING(
+ "_RNCNvCs123_10crate_name9func_names2_12closure_nameCs456_12client_crate",
+ "crate_name::func_name::{closure:closure_name#4}");
+}
+
+TEST(DemangleRust, ItemNestedInsideClosure) {
+ EXPECT_DEMANGLING(
+ "_RNvNCNvCs123_10crate_name9func_name015inner_func_nameCs_12client_crate",
+ "crate_name::func_name::{closure#0}::inner_func_name");
+}
+
+TEST(DemangleRust, Shim) {
+ EXPECT_DEMANGLING(
+ "_RNSNvCs123_10crate_name9func_name6vtableCs456_12client_crate",
+ "crate_name::func_name::{shim:vtable#0}");
+}
+
+TEST(DemangleRust, UnknownUppercaseNamespace) {
+ EXPECT_DEMANGLING(
+ "_RNXNvCs123_10crate_name9func_name14mystery_objectCs456_12client_crate",
+ "crate_name::func_name::{X:mystery_object#0}");
+}
+
+TEST(DemangleRust, NestedUppercaseNamespaces) {
+ EXPECT_DEMANGLING(
+ "_RNCNXNYCs123_10crate_names0_1ys1_1xs2_0Cs456_12client_crate",
+ "crate_name::{Y:y#2}::{X:x#3}::{closure#4}");
+}
+
+TEST(DemangleRust, TraitDefinition) {
+ EXPECT_DEMANGLING(
+ "_RNvYNtC7crate_a9my_structNtC7crate_b8my_trait1f",
+ "<crate_a::my_struct as crate_b::my_trait>::f");
+}
+
+TEST(DemangleRust, BasicTypeNames) {
+ EXPECT_DEMANGLING("_RNvYaNtC1c1t1f", "<i8 as c::t>::f");
+ EXPECT_DEMANGLING("_RNvYbNtC1c1t1f", "<bool as c::t>::f");
+ EXPECT_DEMANGLING("_RNvYcNtC1c1t1f", "<char as c::t>::f");
+ EXPECT_DEMANGLING("_RNvYdNtC1c1t1f", "<f64 as c::t>::f");
+ EXPECT_DEMANGLING("_RNvYeNtC1c1t1f", "<str as c::t>::f");
+ EXPECT_DEMANGLING("_RNvYfNtC1c1t1f", "<f32 as c::t>::f");
+ EXPECT_DEMANGLING("_RNvYhNtC1c1t1f", "<u8 as c::t>::f");
+ EXPECT_DEMANGLING("_RNvYiNtC1c1t1f", "<isize as c::t>::f");
+ EXPECT_DEMANGLING("_RNvYjNtC1c1t1f", "<usize as c::t>::f");
+ EXPECT_DEMANGLING("_RNvYlNtC1c1t1f", "<i32 as c::t>::f");
+ EXPECT_DEMANGLING("_RNvYmNtC1c1t1f", "<u32 as c::t>::f");
+ EXPECT_DEMANGLING("_RNvYnNtC1c1t1f", "<i128 as c::t>::f");
+ EXPECT_DEMANGLING("_RNvYoNtC1c1t1f", "<u128 as c::t>::f");
+ EXPECT_DEMANGLING("_RNvYpNtC1c1t1f", "<_ as c::t>::f");
+ EXPECT_DEMANGLING("_RNvYsNtC1c1t1f", "<i16 as c::t>::f");
+ EXPECT_DEMANGLING("_RNvYtNtC1c1t1f", "<u16 as c::t>::f");
+ EXPECT_DEMANGLING("_RNvYuNtC1c1t1f", "<() as c::t>::f");
+ EXPECT_DEMANGLING("_RNvYvNtC1c1t1f", "<... as c::t>::f");
+ EXPECT_DEMANGLING("_RNvYxNtC1c1t1f", "<i64 as c::t>::f");
+ EXPECT_DEMANGLING("_RNvYyNtC1c1t1f", "<u64 as c::t>::f");
+ EXPECT_DEMANGLING("_RNvYzNtC1c1t1f", "<! as c::t>::f");
+
+ EXPECT_DEMANGLING_FAILS("_RNvYkNtC1c1t1f");
+}
+
+TEST(DemangleRust, SliceTypes) {
+ EXPECT_DEMANGLING("_RNvYSlNtC1c1t1f", "<[i32] as c::t>::f");
+ EXPECT_DEMANGLING("_RNvYSNtC1d1sNtC1c1t1f", "<[d::s] as c::t>::f");
+}
+
+TEST(DemangleRust, ImmutableReferenceTypes) {
+ EXPECT_DEMANGLING("_RNvYRlNtC1c1t1f", "<&i32 as c::t>::f");
+ EXPECT_DEMANGLING("_RNvYRNtC1d1sNtC1c1t1f", "<&d::s as c::t>::f");
+}
+
+TEST(DemangleRust, MutableReferenceTypes) {
+ EXPECT_DEMANGLING("_RNvYQlNtC1c1t1f", "<&mut i32 as c::t>::f");
+ EXPECT_DEMANGLING("_RNvYQNtC1d1sNtC1c1t1f", "<&mut d::s as c::t>::f");
+}
+
+TEST(DemangleRust, ConstantRawPointerTypes) {
+ EXPECT_DEMANGLING("_RNvYPlNtC1c1t1f", "<*const i32 as c::t>::f");
+ EXPECT_DEMANGLING("_RNvYPNtC1d1sNtC1c1t1f", "<*const d::s as c::t>::f");
+}
+
+TEST(DemangleRust, MutableRawPointerTypes) {
+ EXPECT_DEMANGLING("_RNvYOlNtC1c1t1f", "<*mut i32 as c::t>::f");
+ EXPECT_DEMANGLING("_RNvYONtC1d1sNtC1c1t1f", "<*mut d::s as c::t>::f");
+}
+
+TEST(DemangleRust, TupleLength0) {
+ EXPECT_DEMANGLING("_RNvYTENtC1c1t1f", "<() as c::t>::f");
+}
+
+TEST(DemangleRust, TupleLength1) {
+ EXPECT_DEMANGLING("_RNvYTlENtC1c1t1f", "<(i32,) as c::t>::f");
+ EXPECT_DEMANGLING("_RNvYTNtC1d1sENtC1c1t1f", "<(d::s,) as c::t>::f");
+}
+
+TEST(DemangleRust, TupleLength2) {
+ EXPECT_DEMANGLING("_RNvYTlmENtC1c1t1f", "<(i32, u32) as c::t>::f");
+ EXPECT_DEMANGLING("_RNvYTNtC1d1xNtC1e1yENtC1c1t1f",
+ "<(d::x, e::y) as c::t>::f");
+}
+
+TEST(DemangleRust, TupleLength3) {
+ EXPECT_DEMANGLING("_RNvYTlmnENtC1c1t1f", "<(i32, u32, i128) as c::t>::f");
+ EXPECT_DEMANGLING("_RNvYTNtC1d1xNtC1e1yNtC1f1zENtC1c1t1f",
+ "<(d::x, e::y, f::z) as c::t>::f");
+}
+
+TEST(DemangleRust, LongerTuplesAbbreviated) {
+ EXPECT_DEMANGLING("_RNvYTlmnoENtC1c1t1f",
+ "<(i32, u32, i128, ...) as c::t>::f");
+ EXPECT_DEMANGLING("_RNvYTlmnNtC1d1xNtC1e1yENtC1c1t1f",
+ "<(i32, u32, i128, ...) as c::t>::f");
+}
+
+TEST(DemangleRust, PathBackrefToCrate) {
+ EXPECT_DEMANGLING("_RNvYNtC8my_crate9my_structNtB4_8my_trait1f",
+ "<my_crate::my_struct as my_crate::my_trait>::f");
+}
+
+TEST(DemangleRust, PathBackrefToNestedPath) {
+ EXPECT_DEMANGLING("_RNvYNtNtC1c1m1sNtB4_1t1f", "<c::m::s as c::m::t>::f");
+}
+
+TEST(DemangleRust, PathBackrefAsInstantiatingCrate) {
+ EXPECT_DEMANGLING("_RNCNvC8my_crate7my_func0B3_",
+ "my_crate::my_func::{closure#0}");
+}
+
+TEST(DemangleRust, TypeBackrefsNestedInTuple) {
+ EXPECT_DEMANGLING("_RNvYTTRlB4_ERB3_ENtC1c1t1f",
+ "<((&i32, &i32), &(&i32, &i32)) as c::t>::f");
+}
+
+TEST(DemangleRust, NoInfiniteLoopOnBackrefToTheWhole) {
+ EXPECT_DEMANGLING_FAILS("_RB_");
+ EXPECT_DEMANGLING_FAILS("_RNvB_1sNtC1c1t1f");
+}
+
+TEST(DemangleRust, NoCrashOnForwardBackref) {
+ EXPECT_DEMANGLING_FAILS("_RB0_");
+ EXPECT_DEMANGLING_FAILS("_RB1_");
+ EXPECT_DEMANGLING_FAILS("_RB2_");
+ EXPECT_DEMANGLING_FAILS("_RB3_");
+ EXPECT_DEMANGLING_FAILS("_RB4_");
+}
+
+TEST(DemangleRust, PathBackrefsDoNotRecurseDuringSilence) {
+ // B_ points at the value f (the whole mangling), so the cycle would lead to
+ // parse failure if the parser tried to parse what was pointed to.
+ EXPECT_DEMANGLING("_RNvYTlmnNtB_1sENtC1c1t1f",
+ "<(i32, u32, i128, ...) as c::t>::f");
+}
+
+TEST(DemangleRust, TypeBackrefsDoNotRecurseDuringSilence) {
+ // B2_ points at the tuple type, likewise making a cycle that the parser
+ // avoids following.
+ EXPECT_DEMANGLING("_RNvYTlmnB2_ENtC1c1t1f",
+ "<(i32, u32, i128, ...) as c::t>::f");
+}
+
+TEST(DemangleRust, ConstBackrefsDoNotRecurseDuringSilence) {
+ // B_ points at the whole I...E mangling, which does not parse as a const.
+ EXPECT_DEMANGLING("_RINvC1c1fAlB_E", "c::f::<>");
+}
+
+TEST(DemangleRust, ReturnFromBackrefToInputPosition256) {
+ // Show that we can resume at input positions that don't fit into a byte.
+ EXPECT_DEMANGLING("_RNvYNtC1c238very_long_type_"
+ "ABCDEFGHIJabcdefghijABCDEFGHIJabcdefghij"
+ "ABCDEFGHIJabcdefghijABCDEFGHIJabcdefghij"
+ "ABCDEFGHIJabcdefghijABCDEFGHIJabcdefghij"
+ "ABCDEFGHIJabcdefghijABCDEFGHIJabcdefghij"
+ "ABCDEFGHIJabcdefghijABCDEFGHIJabcdefghij"
+ "ABCDEFGHIJabcdefghijABC"
+ "NtB4_1t1f",
+ "<c::very_long_type_"
+ "ABCDEFGHIJabcdefghijABCDEFGHIJabcdefghij"
+ "ABCDEFGHIJabcdefghijABCDEFGHIJabcdefghij"
+ "ABCDEFGHIJabcdefghijABCDEFGHIJabcdefghij"
+ "ABCDEFGHIJabcdefghijABCDEFGHIJabcdefghij"
+ "ABCDEFGHIJabcdefghijABCDEFGHIJabcdefghij"
+ "ABCDEFGHIJabcdefghijABC"
+ " as c::t>::f");
+}
+
+TEST(DemangleRust, EmptyGenericArgs) {
+ EXPECT_DEMANGLING("_RINvC1c1fE", "c::f::<>");
+}
+
+TEST(DemangleRust, OneSimpleTypeInGenericArgs) {
+ EXPECT_DEMANGLING("_RINvC1c1flE", // c::f::<i32>
+ "c::f::<>");
+}
+
+TEST(DemangleRust, OneTupleInGenericArgs) {
+ EXPECT_DEMANGLING("_RINvC1c1fTlmEE", // c::f::<(i32, u32)>
+ "c::f::<>");
+}
+
+TEST(DemangleRust, OnePathInGenericArgs) {
+ EXPECT_DEMANGLING("_RINvC1c1fNtC1d1sE", // c::f::<d::s>
+ "c::f::<>");
+}
+
+TEST(DemangleRust, LongerGenericArgs) {
+ EXPECT_DEMANGLING("_RINvC1c1flmRNtC1d1sE", // c::f::<i32, u32, &d::s>
+ "c::f::<>");
+}
+
+TEST(DemangleRust, BackrefInGenericArgs) {
+ EXPECT_DEMANGLING("_RINvC1c1fRlB7_NtB2_1sE", // c::f::<&i32, &i32, c::s>
+ "c::f::<>");
+}
+
+TEST(DemangleRust, NestedGenericArgs) {
+ EXPECT_DEMANGLING("_RINvC1c1fINtB2_1slEmE", // c::f::<c::s::<i32>, u32>
+ "c::f::<>");
+}
+
+TEST(DemangleRust, MonomorphicEntityNestedInsideGeneric) {
+ EXPECT_DEMANGLING("_RNvINvC1c1fppE1g", // c::f::<_, _>::g
+ "c::f::<>::g");
+}
+
+TEST(DemangleRust, ArrayTypeWithSimpleElementType) {
+ EXPECT_DEMANGLING("_RNvYAlj1f_NtC1c1t1f", "<[i32; 0x1f] as c::t>::f");
+}
+
+TEST(DemangleRust, ArrayTypeWithComplexElementType) {
+ EXPECT_DEMANGLING("_RNvYAINtC1c1slEj1f_NtB6_1t1f",
+ "<[c::s::<>; 0x1f] as c::t>::f");
+}
+
+TEST(DemangleRust, NestedArrayType) {
+ EXPECT_DEMANGLING("_RNvYAAlj1f_j2e_NtC1c1t1f",
+ "<[[i32; 0x1f]; 0x2e] as c::t>::f");
+}
+
+TEST(DemangleRust, BackrefArraySize) {
+ EXPECT_DEMANGLING("_RNvYAAlj1f_B5_NtC1c1t1f",
+ "<[[i32; 0x1f]; 0x1f] as c::t>::f");
+}
+
+TEST(DemangleRust, ZeroArraySize) {
+ EXPECT_DEMANGLING("_RNvYAlj0_NtC1c1t1f", "<[i32; 0x0] as c::t>::f");
+}
+
+TEST(DemangleRust, SurprisingMinusesInArraySize) {
+ // Compilers shouldn't do this stuff, but existing demanglers accept it.
+ EXPECT_DEMANGLING("_RNvYAljn0_NtC1c1t1f", "<[i32; -0x0] as c::t>::f");
+ EXPECT_DEMANGLING("_RNvYAljn42_NtC1c1t1f", "<[i32; -0x42] as c::t>::f");
+}
+
+TEST(DemangleRust, NumberAsGenericArg) {
+ EXPECT_DEMANGLING("_RINvC1c1fKl8_E", // c::f::<0x8>
+ "c::f::<>");
+}
+
+TEST(DemangleRust, NumberAsFirstOfTwoGenericArgs) {
+ EXPECT_DEMANGLING("_RINvC1c1fKl8_mE", // c::f::<0x8, u32>
+ "c::f::<>");
+}
+
+TEST(DemangleRust, NumberAsSecondOfTwoGenericArgs) {
+ EXPECT_DEMANGLING("_RINvC1c1fmKl8_E", // c::f::<u32, 0x8>
+ "c::f::<>");
+}
+
+TEST(DemangleRust, NumberPlaceholder) {
+ EXPECT_DEMANGLING("_RNvINvC1c1fKpE1g", // c::f::<_>::g
+ "c::f::<>::g");
+}
+
+TEST(DemangleRust, InherentImplWithoutDisambiguator) {
+ EXPECT_DEMANGLING("_RNvMNtC8my_crate6my_modNtB2_9my_struct7my_func",
+ "<my_crate::my_mod::my_struct>::my_func");
+}
+
+TEST(DemangleRust, InherentImplWithDisambiguator) {
+ EXPECT_DEMANGLING("_RNvMs_NtC8my_crate6my_modNtB4_9my_struct7my_func",
+ "<my_crate::my_mod::my_struct>::my_func");
+}
+
+TEST(DemangleRust, TraitImplWithoutDisambiguator) {
+ EXPECT_DEMANGLING("_RNvXC8my_crateNtB2_9my_structNtB2_8my_trait7my_func",
+ "<my_crate::my_struct as my_crate::my_trait>::my_func");
+}
+
+TEST(DemangleRust, TraitImplWithDisambiguator) {
+ EXPECT_DEMANGLING("_RNvXs_C8my_crateNtB4_9my_structNtB4_8my_trait7my_func",
+ "<my_crate::my_struct as my_crate::my_trait>::my_func");
+}
+
+TEST(DemangleRust, TraitImplWithNonpathSelfType) {
+ EXPECT_DEMANGLING("_RNvXC8my_crateRlNtB2_8my_trait7my_func",
+ "<&i32 as my_crate::my_trait>::my_func");
+}
+
+TEST(DemangleRust, ThunkType) {
+ EXPECT_DEMANGLING("_RNvYFEuNtC1c1t1f", // <fn() as c::t>::f
+ "<fn... as c::t>::f");
+}
+
+TEST(DemangleRust, NontrivialFunctionReturnType) {
+ EXPECT_DEMANGLING(
+ "_RNvYFERTlmENtC1c1t1f", // <fn() -> &(i32, u32) as c::t>::f
+ "<fn... as c::t>::f");
+}
+
+TEST(DemangleRust, OneParameterType) {
+ EXPECT_DEMANGLING("_RNvYFlEuNtC1c1t1f", // <fn(i32) as c::t>::f
+ "<fn... as c::t>::f");
+}
+
+TEST(DemangleRust, TwoParameterTypes) {
+ EXPECT_DEMANGLING("_RNvYFlmEuNtC1c1t1f", // <fn(i32, u32) as c::t>::f
+ "<fn... as c::t>::f");
+}
+
+TEST(DemangleRust, ExternC) {
+ EXPECT_DEMANGLING("_RNvYFKCEuNtC1c1t1f", // <extern "C" fn() as c::t>>::f
+ "<fn... as c::t>::f");
+}
+
+TEST(DemangleRust, ExternOther) {
+ EXPECT_DEMANGLING(
+ "_RNvYFK5not_CEuNtC1c1t1f", // <extern "not-C" fn() as c::t>::f
+ "<fn... as c::t>::f");
+}
+
+TEST(DemangleRust, Unsafe) {
+ EXPECT_DEMANGLING("_RNvYFUEuNtC1c1t1f", // <unsafe fn() as c::t>::f
+ "<fn... as c::t>::f");
+}
+
+TEST(DemangleRust, Binder) {
+ EXPECT_DEMANGLING(
+ // <for<'a> fn(&'a i32) -> &'a i32 as c::t>::f
+ "_RNvYFG_RL0_lEB5_NtC1c1t1f",
+ "<fn... as c::t>::f");
+}
+
+TEST(DemangleRust, AllFnSigFeaturesInOrder) {
+ EXPECT_DEMANGLING(
+ // <for<'a> unsafe extern "C" fn(&'a i32) -> &'a i32 as c::t>::f
+ "_RNvYFG_UKCRL0_lEB8_NtC1c1t1f",
+ "<fn... as c::t>::f");
+}
+
+TEST(DemangleRust, LifetimeInGenericArgs) {
+ EXPECT_DEMANGLING("_RINvC1c1fINtB2_1sL_EE", // c::f::<c::s::<'_>>
+ "c::f::<>");
+}
+
+TEST(DemangleRust, EmptyDynTrait) {
+ // This shouldn't happen, but the grammar allows it and existing demanglers
+ // accept it.
+ EXPECT_DEMANGLING("_RNvYDEL_NtC1c1t1f",
+ "<dyn as c::t>::f");
+}
+
+TEST(DemangleRust, SimpleDynTrait) {
+ EXPECT_DEMANGLING("_RNvYDNtC1c1tEL_NtC1d1u1f",
+ "<dyn c::t as d::u>::f");
+}
+
+TEST(DemangleRust, DynTraitWithOneAssociatedType) {
+ EXPECT_DEMANGLING(
+ "_RNvYDNtC1c1tp1xlEL_NtC1d1u1f", // <dyn c::t<x = i32> as d::u>::f
+ "<dyn c::t<> as d::u>::f");
+}
+
+TEST(DemangleRust, DynTraitWithTwoAssociatedTypes) {
+ EXPECT_DEMANGLING(
+ // <dyn c::t<x = i32, y = u32> as d::u>::f
+ "_RNvYDNtC1c1tp1xlp1ymEL_NtC1d1u1f",
+ "<dyn c::t<> as d::u>::f");
+}
+
+TEST(DemangleRust, DynTraitPlusAutoTrait) {
+ EXPECT_DEMANGLING(
+ "_RNvYDNtC1c1tNtNtC3std6marker4SendEL_NtC1d1u1f",
+ "<dyn c::t + std::marker::Send as d::u>::f");
+}
+
+TEST(DemangleRust, DynTraitPlusTwoAutoTraits) {
+ EXPECT_DEMANGLING(
+ "_RNvYDNtC1c1tNtNtC3std6marker4CopyNtBc_4SyncEL_NtC1d1u1f",
+ "<dyn c::t + std::marker::Copy + std::marker::Sync as d::u>::f");
+}
+
+TEST(DemangleRust, HigherRankedDynTrait) {
+ EXPECT_DEMANGLING(
+ // <dyn for<'a> c::t::<&'a i32> as d::u>::f
+ "_RNvYDG_INtC1c1tRL0_lEEL_NtC1d1u1f",
+ "<dyn c::t::<> as d::u>::f");
+}
+
+} // namespace
+} // namespace debugging_internal
+ABSL_NAMESPACE_END
+} // namespace absl
diff --git a/absl/debugging/internal/demangle_test.cc b/absl/debugging/internal/demangle_test.cc
index a16ab75e..5579221a 100644
--- a/absl/debugging/internal/demangle_test.cc
+++ b/absl/debugging/internal/demangle_test.cc
@@ -31,6 +31,477 @@ namespace {
using ::testing::ContainsRegex;
+TEST(Demangle, FunctionTemplate) {
+ char tmp[100];
+
+ // template <typename T>
+ // int foo(T);
+ //
+ // foo<int>(5);
+ ASSERT_TRUE(Demangle("_Z3fooIiEiT_", tmp, sizeof(tmp)));
+ EXPECT_STREQ(tmp, "foo<>()");
+}
+
+TEST(Demangle, FunctionTemplateWithNesting) {
+ char tmp[100];
+
+ // template <typename T>
+ // int foo(T);
+ //
+ // foo<Wrapper<int>>({ .value = 5 });
+ ASSERT_TRUE(Demangle("_Z3fooI7WrapperIiEEiT_", tmp, sizeof(tmp)));
+ EXPECT_STREQ(tmp, "foo<>()");
+}
+
+TEST(Demangle, FunctionTemplateWithNonTypeParamConstraint) {
+ char tmp[100];
+
+ // template <std::integral T>
+ // int foo(T);
+ //
+ // foo<int>(5);
+ ASSERT_TRUE(Demangle("_Z3fooITkSt8integraliEiT_", tmp, sizeof(tmp)));
+ EXPECT_STREQ(tmp, "foo<>()");
+}
+
+TEST(Demangle, FunctionTemplateWithFunctionRequiresClause) {
+ char tmp[100];
+
+ // template <typename T>
+ // int foo() requires std::integral<T>;
+ //
+ // foo<int>();
+ ASSERT_TRUE(Demangle("_Z3fooIiEivQsr3stdE8integralIT_E", tmp, sizeof(tmp)));
+ EXPECT_STREQ(tmp, "foo<>()");
+}
+
+TEST(Demangle, FunctionWithTemplateParamRequiresClause) {
+ char tmp[100];
+
+ // template <typename T>
+ // requires std::integral<T>
+ // int foo();
+ //
+ // foo<int>();
+ ASSERT_TRUE(Demangle("_Z3fooIiQsr3stdE8integralIT_EEiv", tmp, sizeof(tmp)));
+ EXPECT_STREQ(tmp, "foo<>()");
+}
+
+TEST(Demangle, FunctionWithTemplateParamAndFunctionRequiresClauses) {
+ char tmp[100];
+
+ // template <typename T>
+ // requires std::integral<T>
+ // int foo() requires std::integral<T>;
+ //
+ // foo<int>();
+ ASSERT_TRUE(Demangle("_Z3fooIiQsr3stdE8integralIT_EEivQsr3stdE8integralIS0_E",
+ tmp, sizeof(tmp)));
+ EXPECT_STREQ(tmp, "foo<>()");
+}
+
+TEST(Demangle, FunctionTemplateBacktracksOnMalformedRequiresClause) {
+ char tmp[100];
+
+ // template <typename T>
+ // int foo(T);
+ //
+ // foo<int>(5);
+ // Except there's an extra `Q` where the mangled requires clause would be.
+ ASSERT_FALSE(Demangle("_Z3fooIiQEiT_", tmp, sizeof(tmp)));
+}
+
+TEST(Demangle, FunctionTemplateWithAutoParam) {
+ char tmp[100];
+
+ // template <auto>
+ // void foo();
+ //
+ // foo<1>();
+ ASSERT_TRUE(Demangle("_Z3fooITnDaLi1EEvv", tmp, sizeof(tmp)));
+ EXPECT_STREQ(tmp, "foo<>()");
+}
+
+TEST(Demangle, FunctionTemplateWithNonTypeParamPack) {
+ char tmp[100];
+
+ // template <int&..., typename T>
+ // void foo(T);
+ //
+ // foo(2);
+ ASSERT_TRUE(Demangle("_Z3fooITpTnRiJEiEvT0_", tmp, sizeof(tmp)));
+ EXPECT_STREQ(tmp, "foo<>()");
+}
+
+TEST(Demangle, FunctionTemplateTemplateParamWithConstrainedArg) {
+ char tmp[100];
+
+ // template <typename T>
+ // concept True = true;
+ //
+ // template <typename T> requires True<T>
+ // struct Fooer {};
+ //
+ // template <template <typename T> typename>
+ // void foo() {}
+ //
+ // foo<Fooer>();
+ ASSERT_TRUE(Demangle("_Z3fooITtTyE5FooerEvv", tmp, sizeof(tmp)));
+ EXPECT_STREQ(tmp, "foo<>()");
+}
+
+TEST(Demangle, ConstrainedAutoInFunctionTemplate) {
+ char tmp[100];
+
+ // template <typename T> concept C = true;
+ // template <C auto N> void f() {}
+ // template void f<0>();
+ ASSERT_TRUE(Demangle("_Z1fITnDk1CLi0EEvv", tmp, sizeof(tmp)));
+ EXPECT_STREQ(tmp, "f<>()");
+}
+
+TEST(Demangle, ConstrainedFriendFunctionTemplate) {
+ char tmp[100];
+
+ // Source:
+ //
+ // namespace ns {
+ // template <class T> struct Y {
+ // friend void y(Y) requires true {}
+ // };
+ // } // namespace ns
+ //
+ // y(ns::Y<int>{});
+ //
+ // LLVM demangling:
+ //
+ // ns::Y<int>::friend y(ns::Y<int>) requires true
+ ASSERT_TRUE(Demangle("_ZN2ns1YIiEF1yES1_QLb1E", tmp, sizeof(tmp)));
+ EXPECT_STREQ(tmp, "ns::Y<>::friend y()");
+}
+
+TEST(Demangle, ConstrainedFriendOperatorTemplate) {
+ char tmp[100];
+
+ // ns::Y<int>::friend operator*(ns::Y<int>) requires true
+ ASSERT_TRUE(Demangle("_ZN2ns1YIiEFdeES1_QLb1E", tmp, sizeof(tmp)));
+ EXPECT_STREQ(tmp, "ns::Y<>::friend operator*()");
+}
+
+TEST(Demangle, NonTemplateBuiltinType) {
+ char tmp[100];
+
+ // void foo(__my_builtin_type t);
+ //
+ // foo({});
+ ASSERT_TRUE(Demangle("_Z3foou17__my_builtin_type", tmp, sizeof(tmp)));
+ EXPECT_STREQ(tmp, "foo()");
+}
+
+TEST(Demangle, SingleArgTemplateBuiltinType) {
+ char tmp[100];
+
+ // template <typename T>
+ // __my_builtin_type<T> foo();
+ //
+ // foo<int>();
+ ASSERT_TRUE(Demangle("_Z3fooIiEu17__my_builtin_typeIT_Ev", tmp, sizeof(tmp)));
+ EXPECT_STREQ(tmp, "foo<>()");
+}
+
+TEST(Demangle, TwoArgTemplateBuiltinType) {
+ char tmp[100];
+
+ // template <typename T, typename U>
+ // __my_builtin_type<T, U> foo();
+ //
+ // foo<int, char>();
+ ASSERT_TRUE(
+ Demangle("_Z3fooIicEu17__my_builtin_typeIT_T0_Ev", tmp, sizeof(tmp)));
+ EXPECT_STREQ(tmp, "foo<>()");
+}
+
+TEST(Demangle, TypeNestedUnderTemplatedBuiltinType) {
+ char tmp[100];
+
+ // Source:
+ //
+ // template <typename T>
+ // typename std::remove_reference_t<T>::type f(T t);
+ //
+ // struct C { using type = C; };
+ //
+ // f<const C&>(C{});
+ //
+ // These days std::remove_reference_t is implemented in terms of a vendor
+ // builtin __remove_reference_t. A full demangling might look like:
+ //
+ // __remove_reference_t<C const&>::type f<C const&>(C const&)
+ ASSERT_TRUE(Demangle("_Z1fIRK1CENu20__remove_reference_tIT_E4typeES3_",
+ tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+}
+
+TEST(Demangle, TemplateTemplateParamSubstitution) {
+ char tmp[100];
+
+ // template <typename T>
+ // concept True = true;
+ //
+ // template<std::integral T, T> struct Foolable {};
+ // template<template<typename T, T> typename> void foo() {}
+ //
+ // template void foo<Foolable>();
+ ASSERT_TRUE(Demangle("_Z3fooITtTyTnTL0__E8FoolableEvv", tmp, sizeof(tmp)));
+ EXPECT_STREQ(tmp, "foo<>()");
+}
+
+TEST(Demangle, TemplateParamSubstitutionWithGenericLambda) {
+ char tmp[100];
+
+ // template <typename>
+ // struct Fooer {
+ // template <typename>
+ // void foo(decltype([](auto x, auto y) {})) {}
+ // };
+ //
+ // Fooer<int> f;
+ // f.foo<int>({});
+ ASSERT_TRUE(
+ Demangle("_ZN5FooerIiE3fooIiEEvNS0_UlTL0__TL0_0_E_E", tmp, sizeof(tmp)));
+ EXPECT_STREQ(tmp, "Fooer<>::foo<>()");
+}
+
+TEST(Demangle, LambdaRequiresTrue) {
+ char tmp[100];
+
+ // auto $_0::operator()<int>(int) const requires true
+ ASSERT_TRUE(Demangle("_ZNK3$_0clIiEEDaT_QLb1E", tmp, sizeof(tmp)));
+ EXPECT_STREQ(tmp, "$_0::operator()<>()");
+}
+
+TEST(Demangle, LambdaRequiresSimpleExpression) {
+ char tmp[100];
+
+ // auto $_0::operator()<int>(int) const requires 2 + 2 == 4
+ ASSERT_TRUE(Demangle("_ZNK3$_0clIiEEDaT_QeqplLi2ELi2ELi4E",
+ tmp, sizeof(tmp)));
+ EXPECT_STREQ(tmp, "$_0::operator()<>()");
+}
+
+TEST(Demangle, LambdaRequiresRequiresExpressionContainingTrue) {
+ char tmp[100];
+
+ // auto $_0::operator()<int>(int) const requires requires { true; }
+ ASSERT_TRUE(Demangle("_ZNK3$_0clIiEEDaT_QrqXLb1EE", tmp, sizeof(tmp)));
+ EXPECT_STREQ(tmp, "$_0::operator()<>()");
+}
+
+TEST(Demangle, LambdaRequiresRequiresExpressionContainingConcept) {
+ char tmp[100];
+
+ // auto $_0::operator()<int>(int) const
+ // requires requires { std::same_as<decltype(fp), int>; }
+ ASSERT_TRUE(Demangle("_ZNK3$_0clIiEEDaT_QrqXsr3stdE7same_asIDtfp_EiEE",
+ tmp, sizeof(tmp)));
+ EXPECT_STREQ(tmp, "$_0::operator()<>()");
+}
+
+TEST(Demangle, LambdaRequiresRequiresExpressionContainingNoexceptExpression) {
+ char tmp[100];
+
+ // auto $_0::operator()<int>(int) const
+ // requires requires { {fp + fp} noexcept; }
+ ASSERT_TRUE(Demangle("_ZNK3$_0clIiEEDaT_QrqXplfp_fp_NE", tmp, sizeof(tmp)));
+ EXPECT_STREQ(tmp, "$_0::operator()<>()");
+}
+
+TEST(Demangle, LambdaRequiresRequiresExpressionContainingReturnTypeConstraint) {
+ char tmp[100];
+
+ // auto $_0::operator()<int>(int) const
+ // requires requires { {fp + fp} -> std::same_as<decltype(fp)>; }
+ ASSERT_TRUE(Demangle("_ZNK3$_0clIiEEDaT_QrqXplfp_fp_RNSt7same_asIDtfp_EEEE",
+ tmp, sizeof(tmp)));
+ EXPECT_STREQ(tmp, "$_0::operator()<>()");
+}
+
+TEST(Demangle, LambdaRequiresRequiresExpressionWithBothNoexceptAndReturnType) {
+ char tmp[100];
+
+ // auto $_0::operator()<int>(int) const
+ // requires requires { {fp + fp} noexcept -> std::same_as<decltype(fp)>; }
+ ASSERT_TRUE(Demangle("_ZNK3$_0clIiEEDaT_QrqXplfp_fp_NRNSt7same_asIDtfp_EEEE",
+ tmp, sizeof(tmp)));
+ EXPECT_STREQ(tmp, "$_0::operator()<>()");
+}
+
+TEST(Demangle, LambdaRequiresRequiresExpressionContainingType) {
+ char tmp[100];
+
+ // auto $_0::operator()<S>(S) const
+ // requires requires { typename S::T; }
+ ASSERT_TRUE(Demangle("_ZNK3$_0clI1SEEDaT_QrqTNS2_1TEE", tmp, sizeof(tmp)));
+ EXPECT_STREQ(tmp, "$_0::operator()<>()");
+}
+
+TEST(Demangle, LambdaRequiresRequiresExpressionNestingAnotherRequires) {
+ char tmp[100];
+
+ // auto $_0::operator()<int>(int) const requires requires { requires true; }
+ ASSERT_TRUE(Demangle("_ZNK3$_0clIiEEDaT_QrqQLb1EE", tmp, sizeof(tmp)));
+ EXPECT_STREQ(tmp, "$_0::operator()<>()");
+}
+
+TEST(Demangle, LambdaRequiresRequiresExpressionContainingTwoRequirements) {
+ char tmp[100];
+
+ // auto $_0::operator()<int>(int) const
+ // requires requires { requires true; requires 2 + 2 == 4; }
+ ASSERT_TRUE(Demangle("_ZNK3$_0clIiEEDaT_QrqXLb1EXeqplLi2ELi2ELi4EE",
+ tmp, sizeof(tmp)));
+ EXPECT_STREQ(tmp, "$_0::operator()<>()");
+}
+
+TEST(Demangle, RequiresExpressionWithItsOwnParameter) {
+ char tmp[100];
+
+ // S<requires (int) { fp + fp; }> f<int>(int)
+ ASSERT_TRUE(Demangle("_Z1fIiE1SIXrQT__XplfL0p_fp_EEES1_", tmp, sizeof(tmp)));
+ EXPECT_STREQ(tmp, "f<>()");
+}
+
+TEST(Demangle, LambdaWithExplicitTypeArgument) {
+ char tmp[100];
+
+ // Source:
+ //
+ // template <class T> T f(T t) {
+ // return []<class U>(U u) { return u + u; }(t);
+ // }
+ //
+ // template int f<int>(int);
+ //
+ // Full LLVM demangling of the lambda call operator:
+ //
+ // auto int f<int>(int)::'lambda'<typename $T>(int)::
+ // operator()<int>(int) const
+ ASSERT_TRUE(Demangle("_ZZ1fIiET_S0_ENKUlTyS0_E_clIiEEDaS0_",
+ tmp, sizeof(tmp)));
+ EXPECT_STREQ(tmp, "f<>()::{lambda()#1}::operator()<>()");
+}
+
+TEST(Demangle, LambdaWithExplicitPackArgument) {
+ char tmp[100];
+
+ // Source:
+ //
+ // template <class T> T h(T t) {
+ // return []<class... U>(U... u) {
+ // return ((u + u) + ... + 0);
+ // }(t);
+ // }
+ //
+ // template int h<int>(int);
+ //
+ // Full LLVM demangling of the lambda call operator:
+ //
+ // auto int f<int>(int)::'lambda'<typename ...$T>($T...)::
+ // operator()<int>($T...) const
+ ASSERT_TRUE(Demangle("_ZZ1fIiET_S0_ENKUlTpTyDpT_E_clIJiEEEDaS2_",
+ tmp, sizeof(tmp)));
+ EXPECT_STREQ(tmp, "f<>()::{lambda()#1}::operator()<>()");
+}
+
+TEST(Demangle, LambdaInClassMemberDefaultArgument) {
+ char tmp[100];
+
+ // Source:
+ //
+ // struct S {
+ // static auto f(void (*g)() = [] {}) { return g; }
+ // };
+ // void (*p)() = S::f();
+ //
+ // Full LLVM demangling of the lambda call operator:
+ //
+ // S::f(void (*)())::'lambda'()::operator()() const
+ //
+ // Full GNU binutils demangling:
+ //
+ // S::f(void (*)())::{default arg#1}::{lambda()#1}::operator()() const
+ ASSERT_TRUE(Demangle("_ZZN1S1fEPFvvEEd_NKUlvE_clEv", tmp, sizeof(tmp)));
+ EXPECT_STREQ(tmp, "S::f()::{default arg#1}::{lambda()#1}::operator()()");
+
+ // The same but in the second rightmost default argument.
+ ASSERT_TRUE(Demangle("_ZZN1S1fEPFvvEEd0_NKUlvE_clEv", tmp, sizeof(tmp)));
+ EXPECT_STREQ(tmp, "S::f()::{default arg#2}::{lambda()#1}::operator()()");
+
+ // Reject negative <(parameter) number> values.
+ ASSERT_FALSE(Demangle("_ZZN1S1fEPFvvEEdn1_NKUlvE_clEv", tmp, sizeof(tmp)));
+}
+
+TEST(Demangle, AvoidSignedOverflowForUnfortunateParameterNumbers) {
+ char tmp[100];
+
+ // Here <number> + 2 fits in an int, but just barely. (We expect no such
+ // input in practice: real functions don't have billions of arguments.)
+ ASSERT_TRUE(Demangle("_ZZN1S1fEPFvvEEd2147483645_NKUlvE_clEv",
+ tmp, sizeof(tmp)));
+ EXPECT_STREQ(tmp,
+ "S::f()::{default arg#2147483647}::{lambda()#1}::operator()()");
+
+ // Now <number> is an int, but <number> + 2 is not.
+ ASSERT_TRUE(Demangle("_ZZN1S1fEPFvvEEd2147483646_NKUlvE_clEv",
+ tmp, sizeof(tmp)));
+ EXPECT_STREQ(tmp, "S::f()::{default arg#1}::{lambda()#1}::operator()()");
+
+ // <number> is the largest int.
+ ASSERT_TRUE(Demangle("_ZZN1S1fEPFvvEEd2147483647_NKUlvE_clEv",
+ tmp, sizeof(tmp)));
+ EXPECT_STREQ(tmp, "S::f()::{default arg#1}::{lambda()#1}::operator()()");
+
+ // <number> itself does not fit into an int. ParseNumber truncates the value
+ // to int, yielding a large negative number, which we strain out.
+ ASSERT_TRUE(Demangle("_ZZN1S1fEPFvvEEd2147483648_NKUlvE_clEv",
+ tmp, sizeof(tmp)));
+ EXPECT_STREQ(tmp, "S::f()::{default arg#1}::{lambda()#1}::operator()()");
+}
+
+TEST(Demangle, SubstpackNotationForTroublesomeTemplatePack) {
+ char tmp[100];
+
+ // Source:
+ //
+ // template <template <class> class, template <class> class> struct B {};
+ //
+ // template <template <class> class... T> struct A {
+ // template <template <class> class... U> void f(B<T, U>&&...) {}
+ // };
+ //
+ // template void A<>::f<>();
+ //
+ // LLVM can't demangle its own _SUBSTPACK_ notation.
+ ASSERT_TRUE(Demangle("_ZN1AIJEE1fIJEEEvDpO1BI_SUBSTPACK_T_E",
+ tmp, sizeof(tmp)));
+ EXPECT_STREQ(tmp, "A<>::f<>()");
+}
+
+TEST(Demangle, TemplateTemplateParamAppearingAsBackrefFollowedByTemplateArgs) {
+ char tmp[100];
+
+ // Source:
+ //
+ // template <template <class> class C> struct W {
+ // template <class T> static decltype(C<T>::m()) f() { return {}; }
+ // };
+ //
+ // template <class T> struct S { static int m() { return 0; } };
+ // template decltype(S<int>::m()) W<S>::f<int>();
+ ASSERT_TRUE(Demangle("_ZN1WI1SE1fIiEEDTclsrS0_IT_EE1mEEv", tmp, sizeof(tmp)));
+ EXPECT_STREQ(tmp, "W<>::f<>()");
+}
+
// Test corner cases of boundary conditions.
TEST(Demangle, CornerCases) {
char tmp[10];
@@ -95,6 +566,250 @@ TEST(Demangle, Clones) {
EXPECT_FALSE(Demangle("_ZL3Foov.isra.2.constprop.", tmp, sizeof(tmp)));
}
+TEST(Demangle, Discriminators) {
+ char tmp[80];
+
+ // Source:
+ //
+ // using Thunk = void (*)();
+ //
+ // Thunk* f() {
+ // static Thunk thunks[12] = {};
+ //
+ // #define THUNK(i) [backslash here]
+ // do { struct S { static void g() {} }; thunks[i] = &S::g; } while (0)
+ //
+ // THUNK(0);
+ // [... repeat for 1 to 10 ...]
+ // THUNK(11);
+ //
+ // return thunks;
+ // }
+ //
+ // The test inputs are manglings of some of the S::g member functions.
+
+ // The first one omits the discriminator.
+ EXPECT_TRUE(Demangle("_ZZ1fvEN1S1gEv", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f()::S::g()", tmp);
+
+ // The second one encodes 0.
+ EXPECT_TRUE(Demangle("_ZZ1fvEN1S1gE_0v", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f()::S::g()", tmp);
+
+ // The eleventh one encodes 9.
+ EXPECT_TRUE(Demangle("_ZZ1fvEN1S1gE_9v", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f()::S::g()", tmp);
+
+ // The twelfth one encodes 10 with extra underscores delimiting it.
+ EXPECT_TRUE(Demangle("_ZZ1fvEN1S1gE__10_v", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f()::S::g()", tmp);
+}
+
+TEST(Demangle, SingleDigitDiscriminatorFollowedByADigit) {
+ char tmp[80];
+
+ // Don't parse 911 as a number.
+ EXPECT_TRUE(Demangle("_ZZ1fvEN1S1gE_911return_type", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f()::S::g()", tmp);
+}
+
+TEST(Demangle, LiteralOfGlobalNamespaceEnumType) {
+ char tmp[80];
+
+ // void f<(E)42>()
+ EXPECT_TRUE(Demangle("_Z1fIL1E42EEvv", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+}
+
+TEST(Demangle, NullptrLiterals) {
+ char tmp[80];
+
+ // void f<nullptr>()
+ EXPECT_TRUE(Demangle("_Z1fILDnEEvv", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+
+ // also void f<nullptr>()
+ EXPECT_TRUE(Demangle("_Z1fILDn0EEvv", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+}
+
+TEST(Demangle, StringLiterals) {
+ char tmp[80];
+
+ // void f<"<char const [42]>">()
+ EXPECT_TRUE(Demangle("_Z1fILA42_KcEEvv", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+}
+
+TEST(Demangle, ComplexFloatingPointLiterals) {
+ char tmp[80];
+
+ // Source (use g++ -fext-numeric-literals to compile):
+ //
+ // using C = double _Complex;
+ // template <class T> void f(char (&)[sizeof(C{sizeof(T)} + 4.0j)]) {}
+ // template void f<int>(char (&)[sizeof(C{sizeof(int)} + 4.0j)]);
+ //
+ // GNU demangling:
+ //
+ // void f<int>(char (&) [sizeof (double _Complex{sizeof (int)}+
+ // ((double _Complex)0000000000000000_4010000000000000))])
+ EXPECT_TRUE(Demangle(
+ "_Z1fIiEvRAszpltlCdstT_ELS0_0000000000000000_4010000000000000E_c",
+ tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+}
+
+TEST(Demangle, Float128) {
+ char tmp[80];
+
+ // S::operator _Float128() const
+ EXPECT_TRUE(Demangle("_ZNK1ScvDF128_Ev", tmp, sizeof(tmp)));
+ EXPECT_STREQ("S::operator _Float128()", tmp);
+}
+
+TEST(Demangle, Float128x) {
+ char tmp[80];
+
+ // S::operator _Float128x() const
+ EXPECT_TRUE(Demangle("_ZNK1ScvDF128xEv", tmp, sizeof(tmp)));
+ EXPECT_STREQ("S::operator _Float128x()", tmp);
+}
+
+TEST(Demangle, Bfloat16) {
+ char tmp[80];
+
+ // S::operator std::bfloat16_t() const
+ EXPECT_TRUE(Demangle("_ZNK1ScvDF16bEv", tmp, sizeof(tmp)));
+ EXPECT_STREQ("S::operator std::bfloat16_t()", tmp);
+}
+
+TEST(Demangle, SimpleSignedBitInt) {
+ char tmp[80];
+
+ // S::operator _BitInt(256)() const
+ EXPECT_TRUE(Demangle("_ZNK1ScvDB256_Ev", tmp, sizeof(tmp)));
+ EXPECT_STREQ("S::operator _BitInt(256)()", tmp);
+}
+
+TEST(Demangle, SimpleUnsignedBitInt) {
+ char tmp[80];
+
+ // S::operator unsigned _BitInt(256)() const
+ EXPECT_TRUE(Demangle("_ZNK1ScvDU256_Ev", tmp, sizeof(tmp)));
+ EXPECT_STREQ("S::operator unsigned _BitInt(256)()", tmp);
+}
+
+TEST(Demangle, DependentBitInt) {
+ char tmp[80];
+
+ // S::operator _BitInt(256)<256>() const
+ EXPECT_TRUE(Demangle("_ZNK1ScvDBT__ILi256EEEv", tmp, sizeof(tmp)));
+ EXPECT_STREQ("S::operator _BitInt(?)<>()", tmp);
+}
+
+TEST(Demangle, ConversionToPointerType) {
+ char tmp[80];
+
+ // S::operator int*() const
+ EXPECT_TRUE(Demangle("_ZNK1ScvPiEv", tmp, sizeof(tmp)));
+ EXPECT_STREQ("S::operator int*()", tmp);
+}
+
+TEST(Demangle, ConversionToLvalueReferenceType) {
+ char tmp[80];
+
+ // S::operator int&() const
+ EXPECT_TRUE(Demangle("_ZNK1ScvRiEv", tmp, sizeof(tmp)));
+ EXPECT_STREQ("S::operator int&()", tmp);
+}
+
+TEST(Demangle, ConversionToRvalueReferenceType) {
+ char tmp[80];
+
+ // S::operator int&&() const
+ EXPECT_TRUE(Demangle("_ZNK1ScvOiEv", tmp, sizeof(tmp)));
+ EXPECT_STREQ("S::operator int&&()", tmp);
+}
+
+TEST(Demangle, ConversionToComplexFloatingPointType) {
+ char tmp[80];
+
+ // S::operator float _Complex() const
+ EXPECT_TRUE(Demangle("_ZNK1ScvCfEv", tmp, sizeof(tmp)));
+ EXPECT_STREQ("S::operator float _Complex()", tmp);
+}
+
+TEST(Demangle, ConversionToImaginaryFloatingPointType) {
+ char tmp[80];
+
+ // S::operator float _Imaginary() const
+ EXPECT_TRUE(Demangle("_ZNK1ScvGfEv", tmp, sizeof(tmp)));
+ EXPECT_STREQ("S::operator float _Imaginary()", tmp);
+}
+
+TEST(Demangle, ConversionToPointerToCvQualifiedType) {
+ char tmp[80];
+
+ // S::operator int const volatile restrict*() const
+ EXPECT_TRUE(Demangle("_ZNK1ScvPrVKiEv", tmp, sizeof(tmp)));
+ EXPECT_STREQ("S::operator int const volatile restrict*()", tmp);
+}
+
+TEST(Demangle, ConversionToLayeredPointerType) {
+ char tmp[80];
+
+ // S::operator int const* const*() const
+ EXPECT_TRUE(Demangle("_ZNK1ScvPKPKiEv", tmp, sizeof(tmp)));
+ EXPECT_STREQ("S::operator int const* const*()", tmp);
+}
+
+TEST(Demangle, ConversionToTypeWithExtendedQualifier) {
+ char tmp[80];
+
+ // S::operator int const AS128*() const
+ //
+ // Because our scan of easy type constructors stops at the extended qualifier,
+ // the demangling preserves the * but loses the const.
+ EXPECT_TRUE(Demangle("_ZNK1ScvPU5AS128KiEv", tmp, sizeof(tmp)));
+ EXPECT_STREQ("S::operator int*()", tmp);
+}
+
+TEST(Demangle, GlobalInitializers) {
+ char tmp[80];
+
+ // old form without suffix
+ EXPECT_TRUE(Demangle("_ZGR1v", tmp, sizeof(tmp)));
+ EXPECT_STREQ("reference temporary for v", tmp);
+
+ // modern form for the whole initializer
+ EXPECT_TRUE(Demangle("_ZGR1v_", tmp, sizeof(tmp)));
+ EXPECT_STREQ("reference temporary for v", tmp);
+
+ // next subobject in depth-first preorder traversal
+ EXPECT_TRUE(Demangle("_ZGR1v0_", tmp, sizeof(tmp)));
+ EXPECT_STREQ("reference temporary for v", tmp);
+
+ // subobject with a larger seq-id
+ EXPECT_TRUE(Demangle("_ZGR1v1Z_", tmp, sizeof(tmp)));
+ EXPECT_STREQ("reference temporary for v", tmp);
+}
+
+TEST(Demangle, StructuredBindings) {
+ char tmp[80];
+
+ // Source:
+ //
+ // struct S { int a, b; };
+ // const auto& [x, y] = S{1, 2};
+
+ // [x, y]
+ EXPECT_TRUE(Demangle("_ZDC1x1yE", tmp, sizeof(tmp)));
+
+ // reference temporary for [x, y]
+ EXPECT_TRUE(Demangle("_ZGRDC1x1yE_", tmp, sizeof(tmp)));
+}
+
// Test the GNU abi_tag extension.
TEST(Demangle, AbiTags) {
char tmp[80];
@@ -119,6 +834,1078 @@ TEST(Demangle, AbiTags) {
EXPECT_STREQ("C[abi:bar][abi:foo]()", tmp);
}
+TEST(Demangle, SimpleGnuVectorSize) {
+ char tmp[80];
+
+ // Source:
+ //
+ // #define VECTOR(size) __attribute__((vector_size(size)))
+ // void f(int x VECTOR(32)) {}
+ //
+ // The attribute's size is a number of bytes. The compiler verifies that this
+ // value corresponds to a whole number of elements and emits the number of
+ // elements as a <number> in the mangling. With sizeof(int) == 4, that yields
+ // 32/4 = 8.
+ //
+ // LLVM demangling:
+ //
+ // f(int vector[8])
+ EXPECT_TRUE(Demangle("_Z1fDv8_i", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f()", tmp);
+}
+
+TEST(Demangle, GnuVectorSizeIsATemplateParameter) {
+ char tmp[80];
+
+ // Source:
+ //
+ // #define VECTOR(size) __attribute__((vector_size(size)))
+ // template <int n> void f(int x VECTOR(n)) {}
+ // template void f<32>(int x VECTOR(32));
+ //
+ // LLVM demangling:
+ //
+ // void f<32>(int vector[32])
+ //
+ // Because the size was dependent on a template parameter, it was encoded
+ // using the general expression encoding. Nothing in the mangling says how
+ // big the element type is, so the demangler is unable to show the element
+ // count 8 instead of the byte count 32. Arguably it would have been better
+ // to make the narrow production encode the byte count, so that nondependent
+ // and dependent versions of a 32-byte vector would both come out as
+ // vector[32].
+ EXPECT_TRUE(Demangle("_Z1fILi32EEvDvT__i", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+}
+
+TEST(Demangle, GnuVectorSizeIsADependentOperatorExpression) {
+ char tmp[80];
+
+ // Source:
+ //
+ // #define VECTOR(size) __attribute__((vector_size(size)))
+ // template <int n> void f(int x VECTOR(2 * n)) {}
+ // template void f<32>(int x VECTOR(2 * 32));
+ //
+ // LLVM demangling:
+ //
+ // void f<32>(int vector[2 * 32])
+ EXPECT_TRUE(Demangle("_Z1fILi32EEvDvmlLi2ET__i", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+}
+
+TEST(Demangle, SimpleAddressSpace) {
+ char tmp[80];
+
+ // Source:
+ //
+ // void f(const int __attribute__((address_space(128)))*) {}
+ //
+ // LLVM demangling:
+ //
+ // f(int const AS128*)
+ //
+ // Itanium ABI 5.1.5.1, "Qualified types", notes that address_space is mangled
+ // nonuniformly as a legacy exception: the number is part of the source-name
+ // if nondependent but is an expression in template-args if dependent. Thus
+ // it is a convenient test case for both forms.
+ EXPECT_TRUE(Demangle("_Z1fPU5AS128Ki", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f()", tmp);
+}
+
+TEST(Demangle, DependentAddressSpace) {
+ char tmp[80];
+
+ // Source:
+ //
+ // template <int n> void f (const int __attribute__((address_space(n)))*) {}
+ // template void f<128>(const int __attribute__((address_space(128)))*);
+ //
+ // LLVM demangling:
+ //
+ // void f<128>(int AS<128>*)
+ EXPECT_TRUE(Demangle("_Z1fILi128EEvPU2ASIT_Ei", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+}
+
+TEST(Demangle, TransactionSafeEntryPoint) {
+ char tmp[80];
+
+ EXPECT_TRUE(Demangle("_ZGTt1fv", tmp, sizeof(tmp)));
+ EXPECT_STREQ("transaction clone for f()", tmp);
+}
+
+TEST(Demangle, TransactionSafeFunctionType) {
+ char tmp[80];
+
+ // GNU demangling: f(void (*)() transaction_safe)
+ EXPECT_TRUE(Demangle("_Z1fPDxFvvE", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f()", tmp);
+}
+
+TEST(Demangle, TemplateParameterObject) {
+ char tmp[80];
+
+ // Source:
+ //
+ // struct S { int x, y; };
+ // template <S s, const S* p = &s> void f() {}
+ // template void f<S{1, 2}>();
+ //
+ // LLVM demangling:
+ //
+ // void f<S{1, 2}, &template parameter object for S{1, 2}>()
+ EXPECT_TRUE(Demangle("_Z1fIXtl1SLi1ELi2EEEXadL_ZTAXtlS0_Li1ELi2EEEEEEvv",
+ tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+
+ // The name of the object standing alone.
+ //
+ // LLVM demangling: template parameter object for S{1, 2}
+ EXPECT_TRUE(Demangle("_ZTAXtl1SLi1ELi2EEE", tmp, sizeof(tmp)));
+ EXPECT_STREQ("template parameter object", tmp);
+}
+
+TEST(Demangle, EnableIfAttributeOnGlobalFunction) {
+ char tmp[80];
+
+ // int f(long l) __attribute__((enable_if(l >= 0, ""))) { return l; }
+ //
+ // f(long) [enable_if:fp >= 0]
+ EXPECT_TRUE(Demangle("_Z1fUa9enable_ifIXgefL0p_Li0EEEl", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f()", tmp);
+}
+
+TEST(Demangle, EnableIfAttributeOnNamespaceScopeFunction) {
+ char tmp[80];
+
+ // namespace ns {
+ // int f(long l) __attribute__((enable_if(l >= 0, ""))) { return l; }
+ // } // namespace ns
+ //
+ // ns::f(long) [enable_if:fp >= 0]
+ EXPECT_TRUE(Demangle("_ZN2ns1fEUa9enable_ifIXgefL0p_Li0EEEl",
+ tmp, sizeof(tmp)));
+ EXPECT_STREQ("ns::f()", tmp);
+}
+
+TEST(Demangle, EnableIfAttributeOnFunctionTemplate) {
+ char tmp[80];
+
+ // template <class T>
+ // T f(T t) __attribute__((enable_if(t >= T{}, ""))) { return t; }
+ // template int f<int>(int);
+ //
+ // int f<int>(int) [enable_if:fp >= int{}]
+ EXPECT_TRUE(Demangle("_Z1fIiEUa9enable_ifIXgefL0p_tliEEET_S0_",
+ tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+}
+
+TEST(Demangle, ThisPointerInDependentSignature) {
+ char tmp[80];
+
+ // decltype(g<int>(this)) S::f<int>()
+ EXPECT_TRUE(Demangle("_ZN1S1fIiEEDTcl1gIT_EfpTEEv", tmp, sizeof(tmp)));
+ EXPECT_STREQ("S::f<>()", tmp);
+}
+
+TEST(Demangle, DependentMemberOperatorCall) {
+ char tmp[80];
+
+ // decltype(fp.operator()()) f<C>(C)
+ EXPECT_TRUE(Demangle("_Z1fI1CEDTcldtfp_onclEET_", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+}
+
+TEST(Demangle, TypeNestedUnderDecltype) {
+ char tmp[80];
+
+ // Source:
+ //
+ // template <class T> struct S { using t = int; };
+ // template <class T> decltype(S<T>{})::t f() { return {}; }
+ // void g() { f<int>(); }
+ //
+ // Full LLVM demangling of the instantiation of f:
+ //
+ // decltype(S<int>{})::t f<int>()
+ EXPECT_TRUE(Demangle("_Z1fIiENDTtl1SIT_EEE1tEv", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+}
+
+TEST(Demangle, ElaboratedTypes) {
+ char tmp[80];
+
+ // Source:
+ //
+ // template <class T> struct S { class C {}; };
+ // template <class T> void f(class S<T>::C) {}
+ // template void f<int>(class S<int>::C);
+ //
+ // LLVM demangling:
+ //
+ // void f<int>(struct S<int>::C)
+ EXPECT_TRUE(Demangle("_Z1fIiEvTsN1SIT_E1CE", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+
+ // The like for unions.
+ EXPECT_TRUE(Demangle("_Z1fIiEvTuN1SIT_E1CE", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+
+ // The like for enums.
+ EXPECT_TRUE(Demangle("_Z1fIiEvTeN1SIT_E1CE", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+}
+
+// Test subobject-address template parameters.
+TEST(Demangle, SubobjectAddresses) {
+ char tmp[80];
+
+ // void f<a.<char const at offset 123>>()
+ EXPECT_TRUE(Demangle("_Z1fIXsoKcL_Z1aE123EEEvv", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+
+ // void f<&a.<char const at offset 0>>()
+ EXPECT_TRUE(Demangle("_Z1fIXadsoKcL_Z1aEEEEvv", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+
+ // void f<&a.<char const at offset 123>>()
+ EXPECT_TRUE(Demangle("_Z1fIXadsoKcL_Z1aE123EEEvv", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+
+ // void f<&a.<char const at offset 123>>(), past the end this time
+ EXPECT_TRUE(Demangle("_Z1fIXadsoKcL_Z1aE123pEEEvv", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+
+ // void f<&a.<char const at offset 0>>() with union-selectors
+ EXPECT_TRUE(Demangle("_Z1fIXadsoKcL_Z1aE__1_234EEEvv", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+
+ // void f<&a.<char const at offset 123>>(), past the end, with union-selector
+ EXPECT_TRUE(Demangle("_Z1fIXadsoKcL_Z1aE123_456pEEEvv", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+}
+
+TEST(Demangle, Preincrement) {
+ char tmp[80];
+
+ // Source:
+ //
+ // template <class T> auto f(T t) -> decltype(T{++t}) { return t; }
+ // template auto f<int>(int t) -> decltype(int{++t});
+ //
+ // Full LLVM demangling of the instantiation of f:
+ //
+ // decltype(int{++fp}) f<int>(int)
+ EXPECT_TRUE(Demangle("_Z1fIiEDTtlT_pp_fp_EES0_", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+}
+
+TEST(Demangle, Postincrement) {
+ char tmp[80];
+
+ // Source:
+ //
+ // template <class T> auto f(T t) -> decltype(T{t++}) { return t; }
+ // template auto f<int>(int t) -> decltype(int{t++});
+ //
+ // Full LLVM demangling of the instantiation of f:
+ //
+ // decltype(int{fp++}) f<int>(int)
+ EXPECT_TRUE(Demangle("_Z1fIiEDTtlT_ppfp_EES0_", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+}
+
+TEST(Demangle, Predecrement) {
+ char tmp[80];
+
+ // Source:
+ //
+ // template <class T> auto f(T t) -> decltype(T{--t}) { return t; }
+ // template auto f<int>(int t) -> decltype(int{--t});
+ //
+ // Full LLVM demangling of the instantiation of f:
+ //
+ // decltype(int{--fp}) f<int>(int)
+ EXPECT_TRUE(Demangle("_Z1fIiEDTtlT_mm_fp_EES0_", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+}
+
+TEST(Demangle, Postdecrement) {
+ char tmp[80];
+
+ // Source:
+ //
+ // template <class T> auto f(T t) -> decltype(T{t--}) { return t; }
+ // template auto f<int>(int t) -> decltype(int{t--});
+ //
+ // Full LLVM demangling of the instantiation of f:
+ //
+ // decltype(int{fp--}) f<int>(int)
+ EXPECT_TRUE(Demangle("_Z1fIiEDTtlT_mmfp_EES0_", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+}
+
+TEST(Demangle, UnaryFoldExpressions) {
+ char tmp[80];
+
+ // Source:
+ //
+ // template <bool b> struct S {};
+ //
+ // template <class... T> auto f(T... t) -> S<((sizeof(T) == 4) || ...)> {
+ // return {};
+ // }
+ //
+ // void g() { f(1, 2L); }
+ //
+ // Full LLVM demangling of the instantiation of f:
+ //
+ // S<((sizeof (int) == 4, sizeof (long) == 4) || ...)> f<int, long>(int, long)
+ EXPECT_TRUE(Demangle("_Z1fIJilEE1SIXfrooeqstT_Li4EEEDpS1_",
+ tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+
+ // The like with a left fold.
+ //
+ // S<(... || (sizeof (int) == 4, sizeof (long) == 4))> f<int, long>(int, long)
+ EXPECT_TRUE(Demangle("_Z1fIJilEE1SIXflooeqstT_Li4EEEDpS1_",
+ tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+}
+
+TEST(Demangle, BinaryFoldExpressions) {
+ char tmp[80];
+
+ // Source:
+ //
+ // template <bool b> struct S {};
+ //
+ // template <class... T> auto f(T... t)
+ // -> S<((sizeof(T) == 4) || ... || false)> {
+ // return {};
+ // }
+ //
+ // void g() { f(1, 2L); }
+ //
+ // Full LLVM demangling of the instantiation of f:
+ //
+ // S<((sizeof (int) == 4, sizeof (long) == 4) || ... || false)>
+ // f<int, long>(int, long)
+ EXPECT_TRUE(Demangle("_Z1fIJilEE1SIXfRooeqstT_Li4ELb0EEEDpS1_",
+ tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+
+ // The like with a left fold.
+ //
+ // S<(false || ... || (sizeof (int) == 4, sizeof (long) == 4))>
+ // f<int, long>(int, long)
+ EXPECT_TRUE(Demangle("_Z1fIJilEE1SIXfLooLb0EeqstT_Li4EEEDpS1_",
+ tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+}
+
+TEST(Demangle, SizeofPacks) {
+ char tmp[80];
+
+ // template <size_t i> struct S {};
+ //
+ // template <class... T> auto f(T... p) -> S<sizeof...(T)> { return {}; }
+ // template auto f<int, long>(int, long) -> S<2>;
+ //
+ // template <class... T> auto g(T... p) -> S<sizeof...(p)> { return {}; }
+ // template auto g<int, long>(int, long) -> S<2>;
+
+ // S<sizeof...(int, long)> f<int, long>(int, long)
+ EXPECT_TRUE(Demangle("_Z1fIJilEE1SIXsZT_EEDpT_", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+
+ // S<sizeof... (fp)> g<int, long>(int, long)
+ EXPECT_TRUE(Demangle("_Z1gIJilEE1SIXsZfp_EEDpT_", tmp, sizeof(tmp)));
+ EXPECT_STREQ("g<>()", tmp);
+}
+
+TEST(Demangle, SizeofPackInvolvingAnAliasTemplate) {
+ char tmp[80];
+
+ // Source:
+ //
+ // template <class... T> using A = char[sizeof...(T)];
+ // template <class... U> void f(const A<U..., int>&) {}
+ // template void f<int>(const A<int, int>&);
+ //
+ // Full LLVM demangling of the instantiation of f:
+ //
+ // void f<int>(char const (&) [sizeof... (int, int)])
+ EXPECT_TRUE(Demangle("_Z1fIJiEEvRAsPDpT_iE_Kc", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+}
+
+TEST(Demangle, UserDefinedLiteral) {
+ char tmp[80];
+
+ // Source:
+ //
+ // unsigned long long operator""_lit(unsigned long long x) { return x; }
+ //
+ // LLVM demangling:
+ //
+ // operator"" _lit(unsigned long long)
+ EXPECT_TRUE(Demangle("_Zli4_lity", tmp, sizeof(tmp)));
+ EXPECT_STREQ("operator\"\" _lit()", tmp);
+}
+
+TEST(Demangle, Spaceship) {
+ char tmp[80];
+
+ // #include <compare>
+ //
+ // struct S { auto operator<=>(const S&) const = default; };
+ // auto (S::*f) = &S::operator<=>; // make sure S::operator<=> is emitted
+ //
+ // template <class T> auto g(T x, T y) -> decltype(x <=> y) {
+ // return x <=> y;
+ // }
+ // template auto g<S>(S x, S y) -> decltype(x <=> y);
+
+ // S::operator<=>(S const&) const
+ EXPECT_TRUE(Demangle("_ZNK1SssERKS_", tmp, sizeof(tmp)));
+ EXPECT_STREQ("S::operator<=>()", tmp);
+
+ // decltype(fp <=> fp0) g<S>(S, S)
+ EXPECT_TRUE(Demangle("_Z1gI1SEDTssfp_fp0_ET_S2_", tmp, sizeof(tmp)));
+ EXPECT_STREQ("g<>()", tmp);
+}
+
+TEST(Demangle, CoAwait) {
+ char tmp[80];
+
+ // ns::Awaitable::operator co_await() const
+ EXPECT_TRUE(Demangle("_ZNK2ns9AwaitableawEv", tmp, sizeof(tmp)));
+ EXPECT_STREQ("ns::Awaitable::operator co_await()", tmp);
+}
+
+TEST(Demangle, VendorExtendedExpressions) {
+ char tmp[80];
+
+ // void f<__e()>()
+ EXPECT_TRUE(Demangle("_Z1fIXu3__eEEEvv", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+
+ // void f<__e(int, long)>()
+ EXPECT_TRUE(Demangle("_Z1fIXu3__eilEEEvv", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+}
+
+TEST(Demangle, DirectListInitialization) {
+ char tmp[80];
+
+ // template <class T> decltype(T{}) f() { return T{}; }
+ // template decltype(int{}) f<int>();
+ //
+ // struct XYZ { int x, y, z; };
+ // template <class T> decltype(T{1, 2, 3}) g() { return T{1, 2, 3}; }
+ // template decltype(XYZ{1, 2, 3}) g<XYZ>();
+ //
+ // template <class T> decltype(T{.x = 1, .y = 2, .z = 3}) h() {
+ // return T{.x = 1, .y = 2, .z = 3};
+ // }
+ // template decltype(XYZ{.x = 1, .y = 2, .z = 3}) h<XYZ>();
+ //
+ // // The following two cases require full C99 designated initializers,
+ // // not part of C++ but likely available as an extension if you ask your
+ // // compiler nicely.
+ //
+ // struct A { int a[4]; };
+ // template <class T> decltype(T{.a[2] = 42}) i() { return T{.a[2] = 42}; }
+ // template decltype(A{.a[2] = 42}) i<A>();
+ //
+ // template <class T> decltype(T{.a[1 ... 3] = 42}) j() {
+ // return T{.a[1 ... 3] = 42};
+ // }
+ // template decltype(A{.a[1 ... 3] = 42}) j<A>();
+
+ // decltype(int{}) f<int>()
+ EXPECT_TRUE(Demangle("_Z1fIiEDTtlT_EEv", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+
+ // decltype(XYZ{1, 2, 3}) g<XYZ>()
+ EXPECT_TRUE(Demangle("_Z1gI3XYZEDTtlT_Li1ELi2ELi3EEEv", tmp, sizeof(tmp)));
+ EXPECT_STREQ("g<>()", tmp);
+
+ // decltype(XYZ{.x = 1, .y = 2, .z = 3}) h<XYZ>()
+ EXPECT_TRUE(Demangle("_Z1hI3XYZEDTtlT_di1xLi1Edi1yLi2Edi1zLi3EEEv",
+ tmp, sizeof(tmp)));
+ EXPECT_STREQ("h<>()", tmp);
+
+ // decltype(A{.a[2] = 42}) i<A>()
+ EXPECT_TRUE(Demangle("_Z1iI1AEDTtlT_di1adxLi2ELi42EEEv", tmp, sizeof(tmp)));
+ EXPECT_STREQ("i<>()", tmp);
+
+ // decltype(A{.a[1 ... 3] = 42}) j<A>()
+ EXPECT_TRUE(Demangle("_Z1jI1AEDTtlT_di1adXLi1ELi3ELi42EEEv",
+ tmp, sizeof(tmp)));
+ EXPECT_STREQ("j<>()", tmp);
+}
+
+TEST(Demangle, SimpleInitializerLists) {
+ char tmp[80];
+
+ // Common preamble of source-code examples in this test function:
+ //
+ // #include <initializer_list>
+ //
+ // template <class T> void g(std::initializer_list<T>) {}
+
+ // Source:
+ //
+ // template <class T> auto f() -> decltype(g<T>({})) {}
+ // template auto f<int>() -> decltype(g<int>({}));
+ //
+ // Full LLVM demangling of the instantiation of f:
+ //
+ // decltype(g<int>({})) f<int>()
+ EXPECT_TRUE(Demangle("_Z1fIiEDTcl1gIT_EilEEEv", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+
+ // Source:
+ //
+ // template <class T> auto f(T x) -> decltype(g({x})) {}
+ // template auto f<int>(int x) -> decltype(g({x}));
+ //
+ // Full LLVM demangling of the instantiation of f:
+ //
+ // decltype(g({fp})) f<int>(int)
+ EXPECT_TRUE(Demangle("_Z1fIiEDTcl1gilfp_EEET_", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+
+ // Source:
+ //
+ // template <class T> auto f(T x, T y) -> decltype(g({x, y})) {}
+ // template auto f<int>(int x, int y) -> decltype(g({x, y}));
+ //
+ // Full LLVM demangling of the instantiation of f:
+ //
+ // decltype(g({fp, fp0})) f<int>(int, int)
+ EXPECT_TRUE(Demangle("_Z1fIiEDTcl1gilfp_fp0_EEET_S1_", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+}
+
+TEST(Demangle, BracedListImplicitlyConstructingAClassObject) {
+ char tmp[80];
+
+ // Source:
+ //
+ // struct S { int v; };
+ // void g(S) {}
+ // template <class T> auto f(T x) -> decltype(g({.v = x})) {}
+ // template auto f<int>(int x) -> decltype(g({.v = x}));
+ //
+ // Full LLVM demangling of the instantiation of f:
+ //
+ // decltype(g({.v = fp})) f<int>(int)
+ EXPECT_TRUE(Demangle("_Z1fIiEDTcl1gildi1vfp_EEET_", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+}
+
+TEST(Demangle, SimpleNewExpression) {
+ char tmp[80];
+
+ // Source:
+ //
+ // template <class T> decltype(T{*new T}) f() { return T{}; }
+ // template decltype(int{*new int}) f<int>();
+ //
+ // Full LLVM demangling of the instantiation of f:
+ //
+ // decltype(int{*(new int)}) f<int>()
+ EXPECT_TRUE(Demangle("_Z1fIiEDTtlT_denw_S0_EEEv", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+}
+
+TEST(Demangle, NewExpressionWithEmptyParentheses) {
+ char tmp[80];
+
+ // Source:
+ //
+ // template <class T> decltype(T{*new T()}) f() { return T{}; }
+ // template decltype(int{*new int()}) f<int>();
+ //
+ // Full LLVM demangling of the instantiation of f:
+ //
+ // decltype(int{*(new int)}) f<int>()
+ EXPECT_TRUE(Demangle("_Z1fIiEDTtlT_denw_S0_piEEEv", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+}
+
+TEST(Demangle, NewExpressionWithNonemptyParentheses) {
+ char tmp[80];
+
+ // Source:
+ //
+ // template <class T> decltype(T{*new T(42)}) f() { return T{}; }
+ // template decltype(int{*new int(42)}) f<int>();
+ //
+ // Full LLVM demangling of the instantiation of f:
+ //
+ // decltype(int{*(new int(42))}) f<int>()
+ EXPECT_TRUE(Demangle("_Z1fIiEDTtlT_denw_S0_piLi42EEEEv", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+}
+
+TEST(Demangle, PlacementNewExpression) {
+ char tmp[80];
+
+ // Source:
+ //
+ // #include <new>
+ //
+ // template <class T> auto f(T t) -> decltype(T{*new (&t) T(42)}) {
+ // return t;
+ // }
+ // template auto f<int>(int t) -> decltype(int{*new (&t) int(42)});
+ //
+ // Full LLVM demangling of the instantiation of f:
+ //
+ // decltype(int{*(new(&fp) int(42))}) f<int>(int)
+ EXPECT_TRUE(Demangle("_Z1fIiEDTtlT_denwadfp__S0_piLi42EEEES0_",
+ tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+}
+
+TEST(Demangle, GlobalScopeNewExpression) {
+ char tmp[80];
+
+ // Source:
+ //
+ // template <class T> decltype(T{*::new T}) f() { return T{}; }
+ // template decltype(int{*::new int}) f<int>();
+ //
+ // Full LLVM demangling of the instantiation of f:
+ //
+ // decltype(int{*(::new int)}) f<int>()
+ EXPECT_TRUE(Demangle("_Z1fIiEDTtlT_degsnw_S0_EEEv", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+}
+
+TEST(Demangle, NewExpressionWithEmptyBraces) {
+ char tmp[80];
+
+ // Source:
+ //
+ // template <class T> decltype(T{*new T{}}) f() { return T{}; }
+ // template decltype(int{*new int{}}) f<int>();
+ //
+ // GNU demangling:
+ //
+ // decltype (int{*(new int{})}) f<int>()
+ EXPECT_TRUE(Demangle("_Z1fIiEDTtlT_denw_S0_ilEEEv", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+}
+
+TEST(Demangle, NewExpressionWithNonemptyBraces) {
+ char tmp[80];
+
+ // Source:
+ //
+ // template <class T> decltype(T{*new T{42}}) f() { return T{}; }
+ // template decltype(int{*new int{42}}) f<int>();
+ //
+ // GNU demangling:
+ //
+ // decltype (int{*(new int{42})}) f<int>()
+ EXPECT_TRUE(Demangle("_Z1fIiEDTtlT_denw_S0_ilLi42EEEEv", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+}
+
+TEST(Demangle, SimpleArrayNewExpression) {
+ char tmp[80];
+
+ // Source:
+ //
+ // template <class T> decltype(T{*new T[1]}) f() { return T{}; }
+ // template decltype(int{*new int[1]}) f<int>();
+ //
+ // Full LLVM demangling of the instantiation of f:
+ //
+ // decltype(int{*(new[] int)}) f<int>()
+ EXPECT_TRUE(Demangle("_Z1fIiEDTtlT_dena_S0_EEEv", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+}
+
+TEST(Demangle, ArrayNewExpressionWithEmptyParentheses) {
+ char tmp[80];
+
+ // Source:
+ //
+ // template <class T> decltype(T{*new T[1]()}) f() { return T{}; }
+ // template decltype(int{*new int[1]()}) f<int>();
+ //
+ // Full LLVM demangling of the instantiation of f:
+ //
+ // decltype(int{*(new[] int)}) f<int>()
+ EXPECT_TRUE(Demangle("_Z1fIiEDTtlT_dena_S0_piEEEv", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+}
+
+TEST(Demangle, ArrayPlacementNewExpression) {
+ char tmp[80];
+
+ // Source:
+ //
+ // #include <new>
+ //
+ // template <class T> auto f(T t) -> decltype(T{*new (&t) T[1]}) {
+ // return T{};
+ // }
+ // template auto f<int>(int t) -> decltype(int{*new (&t) int[1]});
+ //
+ // Full LLVM demangling of the instantiation of f:
+ //
+ // decltype(int{*(new[](&fp) int)}) f<int>(int)
+ EXPECT_TRUE(Demangle("_Z1fIiEDTtlT_denaadfp__S0_EEES0_", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+}
+
+TEST(Demangle, GlobalScopeArrayNewExpression) {
+ char tmp[80];
+
+ // Source:
+ //
+ // template <class T> decltype(T{*::new T[1]}) f() { return T{}; }
+ // template decltype(int{*::new int[1]}) f<int>();
+ //
+ // Full LLVM demangling of the instantiation of f:
+ //
+ // decltype(int{*(::new[] int)}) f<int>()
+ EXPECT_TRUE(Demangle("_Z1fIiEDTtlT_degsna_S0_EEEv", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+}
+
+TEST(Demangle, ArrayNewExpressionWithTwoElementsInBraces) {
+ char tmp[80];
+
+ // Source:
+ //
+ // template <class T> decltype(T{*new T[2]{1, 2}}) f() { return T{}; }
+ // template decltype(int{*new int[2]{1, 2}}) f<int>();
+ //
+ // GNU demangling:
+ //
+ // decltype (int{*(new int{1, 2})}) f<int>()
+ EXPECT_TRUE(Demangle("_Z1fIiEDTtlT_dena_S0_ilLi1ELi2EEEEv",
+ tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+}
+
+TEST(Demangle, SimpleDeleteExpression) {
+ char tmp[80];
+
+ // Source:
+ //
+ // template <class T> auto f(T* p) -> decltype(delete p) {}
+ // template auto f<int>(int* p) -> decltype(delete p);
+ //
+ // LLVM demangling:
+ //
+ // decltype(delete fp) f<int>(int*)
+ EXPECT_TRUE(Demangle("_Z1fIiEDTdlfp_EPT_", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+}
+
+TEST(Demangle, GlobalScopeDeleteExpression) {
+ char tmp[80];
+
+ // Source:
+ //
+ // template <class T> auto f(T* p) -> decltype(::delete p) {}
+ // template auto f<int>(int* p) -> decltype(::delete p);
+ //
+ // LLVM demangling:
+ //
+ // decltype(::delete fp) f<int>(int*)
+ EXPECT_TRUE(Demangle("_Z1fIiEDTgsdlfp_EPT_", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+}
+
+TEST(Demangle, SimpleArrayDeleteExpression) {
+ char tmp[80];
+
+ // Source:
+ //
+ // template <class T> auto f(T* a) -> decltype(delete[] a) {}
+ // template auto f<int>(int* a) -> decltype(delete[] a);
+ //
+ // LLVM demangling:
+ //
+ // decltype(delete[] fp) f<int>(int*)
+ EXPECT_TRUE(Demangle("_Z1fIiEDTdafp_EPT_", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+}
+
+TEST(Demangle, GlobalScopeArrayDeleteExpression) {
+ char tmp[80];
+
+ // Source:
+ //
+ // template <class T> auto f(T* a) -> decltype(::delete[] a) {}
+ // template auto f<int>(int* a) -> decltype(::delete[] a);
+ //
+ // LLVM demangling:
+ //
+ // decltype(::delete[] fp) f<int>(int*)
+ EXPECT_TRUE(Demangle("_Z1fIiEDTgsdafp_EPT_", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+}
+
+TEST(Demangle, ReferenceQualifiedFunctionTypes) {
+ char tmp[80];
+
+ // void f(void (*)() const &, int)
+ EXPECT_TRUE(Demangle("_Z1fPKFvvREi", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f()", tmp);
+
+ // void f(void (*)() &&, int)
+ EXPECT_TRUE(Demangle("_Z1fPFvvOEi", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f()", tmp);
+
+ // void f(void (*)(int&) &, int)
+ EXPECT_TRUE(Demangle("_Z1fPFvRiREi", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f()", tmp);
+
+ // void f(void (*)(S&&) &&, int)
+ EXPECT_TRUE(Demangle("_Z1fPFvO1SOEi", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f()", tmp);
+}
+
+TEST(Demangle, DynamicCast) {
+ char tmp[80];
+
+ // Source:
+ //
+ // template <class T> auto f(T* p) -> decltype(dynamic_cast<const T*>(p)) {
+ // return p;
+ // }
+ // struct S {};
+ // void g(S* p) { f(p); }
+ //
+ // Full LLVM demangling of the instantiation of f:
+ //
+ // decltype(dynamic_cast<S const*>(fp)) f<S>(S*)
+ EXPECT_TRUE(Demangle("_Z1fI1SEDTdcPKT_fp_EPS1_", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+}
+
+TEST(Demangle, StaticCast) {
+ char tmp[80];
+
+ // Source:
+ //
+ // template <class T> auto f(T* p) -> decltype(static_cast<const T*>(p)) {
+ // return p;
+ // }
+ // void g(int* p) { f(p); }
+ //
+ // Full LLVM demangling of the instantiation of f:
+ //
+ // decltype(static_cast<int const*>(fp)) f<int>(int*)
+ EXPECT_TRUE(Demangle("_Z1fIiEDTscPKT_fp_EPS0_", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+}
+
+TEST(Demangle, ConstCast) {
+ char tmp[80];
+
+ // Source:
+ //
+ // template <class T> auto f(T* p) -> decltype(const_cast<const T*>(p)) {
+ // return p;
+ // }
+ // void g(int* p) { f(p); }
+ //
+ // Full LLVM demangling of the instantiation of f:
+ //
+ // decltype(const_cast<int const*>(fp)) f<int>(int*)
+ EXPECT_TRUE(Demangle("_Z1fIiEDTccPKT_fp_EPS0_", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+}
+
+TEST(Demangle, ReinterpretCast) {
+ char tmp[80];
+
+ // Source:
+ //
+ // template <class T> auto f(T* p)
+ // -> decltype(reinterpret_cast<const T*>(p)) {
+ // return p;
+ // }
+ // void g(int* p) { f(p); }
+ //
+ // Full LLVM demangling of the instantiation of f:
+ //
+ // decltype(reinterpret_cast<int const*>(fp)) f<int>(int*)
+ EXPECT_TRUE(Demangle("_Z1fIiEDTrcPKT_fp_EPS0_", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+}
+
+TEST(Demangle, TypeidType) {
+ char tmp[80];
+
+ // Source:
+ //
+ // #include <typeinfo>
+ //
+ // template <class T> decltype(typeid(T).name()) f(T) { return nullptr; }
+ // template decltype(typeid(int).name()) f<int>(int);
+ //
+ // Full LLVM demangling of the instantiation of f:
+ //
+ // decltype(typeid (int).name()) f<int>(int)
+ EXPECT_TRUE(Demangle("_Z1fIiEDTcldttiT_4nameEES0_", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+}
+
+TEST(Demangle, TypeidExpression) {
+ char tmp[80];
+
+ // Source:
+ //
+ // #include <typeinfo>
+ //
+ // template <class T> decltype(typeid(T{}).name()) f(T) { return nullptr; }
+ // template decltype(typeid(int{}).name()) f<int>(int);
+ //
+ // Full LLVM demangling of the instantiation of f:
+ //
+ // decltype(typeid (int{}).name()) f<int>(int)
+ EXPECT_TRUE(Demangle("_Z1fIiEDTcldttetlT_E4nameEES0_", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+}
+
+TEST(Demangle, AlignofType) {
+ char tmp[80];
+
+ // Source:
+ //
+ // template <class T> T f(T (&a)[alignof(T)]) { return a[0]; }
+ // template int f<int>(int (&)[alignof(int)]);
+ //
+ // Full LLVM demangling of the instantiation of f:
+ //
+ // int f<int>(int (&) [alignof (int)])
+ EXPECT_TRUE(Demangle("_Z1fIiET_RAatS0__S0_", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+}
+
+TEST(Demangle, AlignofExpression) {
+ char tmp[80];
+
+ // Source (note that this uses a GNU extension; it is not standard C++):
+ //
+ // template <class T> T f(T (&a)[alignof(T{})]) { return a[0]; }
+ // template int f<int>(int (&)[alignof(int{})]);
+ //
+ // Full LLVM demangling of the instantiation of f:
+ //
+ // int f<int>(int (&) [alignof (int{})])
+ EXPECT_TRUE(Demangle("_Z1fIiET_RAaztlS0_E_S0_", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+}
+
+TEST(Demangle, NoexceptExpression) {
+ char tmp[80];
+
+ // Source:
+ //
+ // template <class T> void f(T (&a)[noexcept(T{})]) {}
+ // template void f<int>(int (&)[noexcept(int{})]);
+ //
+ // Full LLVM demangling of the instantiation of f:
+ //
+ // void f<int>(int (&) [noexcept (int{})])
+ EXPECT_TRUE(Demangle("_Z1fIiEvRAnxtlT_E_S0_", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+}
+
+TEST(Demangle, UnaryThrow) {
+ char tmp[80];
+
+ // Source:
+ //
+ // template <bool b> decltype(b ? throw b : 0) f() { return 0; }
+ // template decltype(false ? throw false : 0) f<false>();
+ //
+ // Full LLVM demangling of the instantiation of f:
+ //
+ // decltype(false ? throw false : 0) f<false>()
+ EXPECT_TRUE(Demangle("_Z1fILb0EEDTquT_twT_Li0EEv", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+}
+
+TEST(Demangle, NullaryThrow) {
+ char tmp[80];
+
+ // Source:
+ //
+ // template <bool b> decltype(b ? throw : 0) f() { return 0; }
+ // template decltype(false ? throw : 0) f<false>();
+ //
+ // Full LLVM demangling of the instantiation of f:
+ //
+ // decltype(false ? throw : 0) f<false>()
+ EXPECT_TRUE(Demangle("_Z1fILb0EEDTquT_trLi0EEv", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+}
+
+TEST(Demangle, ThreadLocalWrappers) {
+ char tmp[80];
+
+ EXPECT_TRUE(Demangle("_ZTWN2ns3varE", tmp, sizeof(tmp)));
+ EXPECT_STREQ("thread-local wrapper routine for ns::var", tmp);
+
+ EXPECT_TRUE(Demangle("_ZTHN2ns3varE", tmp, sizeof(tmp)));
+ EXPECT_STREQ("thread-local initialization routine for ns::var", tmp);
+}
+
+TEST(Demangle, DubiousSrStSymbols) {
+ char tmp[80];
+
+ // GNU demangling (not accepted by LLVM):
+ //
+ // S<std::u<char>::v> f<char>()
+ EXPECT_TRUE(Demangle("_Z1fIcE1SIXsrSt1uIT_E1vEEv", tmp, sizeof(tmp)));
+ EXPECT_STREQ("f<>()", tmp);
+
+ // A real case from the wild.
+ //
+ // GNU demangling (not accepted by LLVM) with line breaks and indentation
+ // added for readability:
+ //
+ // __gnu_cxx::__enable_if<std::__is_char<char>::__value, bool>::__type
+ // std::operator==<char>(
+ // std::__cxx11::basic_string<char, std::char_traits<char>,
+ // std::allocator<char> > const&,
+ // std::__cxx11::basic_string<char, std::char_traits<char>,
+ // std::allocator<char> > const&)
+ EXPECT_TRUE(Demangle(
+ "_ZSteqIcEN9__gnu_cxx11__enable_if"
+ "IXsrSt9__is_charIT_E7__valueEbE"
+ "6__typeE"
+ "RKNSt7__cxx1112basic_stringIS3_St11char_traitsIS3_ESaIS3_EEESE_",
+ tmp, sizeof(tmp)));
+ EXPECT_STREQ("std::operator==<>()", tmp);
+}
+
+// Test one Rust symbol to exercise Demangle's delegation path. Rust demangling
+// itself is more thoroughly tested in demangle_rust_test.cc.
+TEST(Demangle, DelegatesToDemangleRustSymbolEncoding) {
+ char tmp[80];
+
+ EXPECT_TRUE(Demangle("_RNvC8my_crate7my_func", tmp, sizeof(tmp)));
+ EXPECT_STREQ("my_crate::my_func", tmp);
+}
+
// Tests that verify that Demangle footprint is within some limit.
// They are not to be run under sanitizers as the sanitizers increase
// stack consumption by about 4x.
diff --git a/absl/debugging/internal/elf_mem_image.cc b/absl/debugging/internal/elf_mem_image.cc
index 42dcd3cd..2c168309 100644
--- a/absl/debugging/internal/elf_mem_image.cc
+++ b/absl/debugging/internal/elf_mem_image.cc
@@ -20,8 +20,11 @@
#ifdef ABSL_HAVE_ELF_MEM_IMAGE // defined in elf_mem_image.h
#include <string.h>
+
#include <cassert>
#include <cstddef>
+#include <cstdint>
+
#include "absl/base/config.h"
#include "absl/base/internal/raw_logging.h"
@@ -86,20 +89,14 @@ ElfMemImage::ElfMemImage(const void *base) {
Init(base);
}
-int ElfMemImage::GetNumSymbols() const {
- if (!hash_) {
- return 0;
- }
- // See http://www.caldera.com/developers/gabi/latest/ch5.dynamic.html#hash
- return static_cast<int>(hash_[1]);
-}
+uint32_t ElfMemImage::GetNumSymbols() const { return num_syms_; }
-const ElfW(Sym) *ElfMemImage::GetDynsym(int index) const {
+const ElfW(Sym) * ElfMemImage::GetDynsym(uint32_t index) const {
ABSL_RAW_CHECK(index < GetNumSymbols(), "index out of range");
return dynsym_ + index;
}
-const ElfW(Versym) *ElfMemImage::GetVersym(int index) const {
+const ElfW(Versym) *ElfMemImage::GetVersym(uint32_t index) const {
ABSL_RAW_CHECK(index < GetNumSymbols(), "index out of range");
return versym_ + index;
}
@@ -154,7 +151,7 @@ void ElfMemImage::Init(const void *base) {
dynstr_ = nullptr;
versym_ = nullptr;
verdef_ = nullptr;
- hash_ = nullptr;
+ num_syms_ = 0;
strsize_ = 0;
verdefnum_ = 0;
// Sentinel: PT_LOAD .p_vaddr can't possibly be this.
@@ -219,12 +216,17 @@ void ElfMemImage::Init(const void *base) {
base_as_char - reinterpret_cast<const char *>(link_base_);
ElfW(Dyn)* dynamic_entry = reinterpret_cast<ElfW(Dyn)*>(
static_cast<intptr_t>(dynamic_program_header->p_vaddr) + relocation);
+ uint32_t *sysv_hash = nullptr;
+ uint32_t *gnu_hash = nullptr;
for (; dynamic_entry->d_tag != DT_NULL; ++dynamic_entry) {
const auto value =
static_cast<intptr_t>(dynamic_entry->d_un.d_val) + relocation;
switch (dynamic_entry->d_tag) {
case DT_HASH:
- hash_ = reinterpret_cast<ElfW(Word) *>(value);
+ sysv_hash = reinterpret_cast<uint32_t *>(value);
+ break;
+ case DT_GNU_HASH:
+ gnu_hash = reinterpret_cast<uint32_t *>(value);
break;
case DT_SYMTAB:
dynsym_ = reinterpret_cast<ElfW(Sym) *>(value);
@@ -249,13 +251,38 @@ void ElfMemImage::Init(const void *base) {
break;
}
}
- if (!hash_ || !dynsym_ || !dynstr_ || !versym_ ||
+ if ((!sysv_hash && !gnu_hash) || !dynsym_ || !dynstr_ || !versym_ ||
!verdef_ || !verdefnum_ || !strsize_) {
assert(false); // invalid VDSO
// Mark this image as not present. Can not recur infinitely.
Init(nullptr);
return;
}
+ if (sysv_hash) {
+ num_syms_ = sysv_hash[1];
+ } else {
+ assert(gnu_hash);
+ // Compute the number of symbols for DT_GNU_HASH, which is specified by
+ // https://sourceware.org/gnu-gabi/program-loading-and-dynamic-linking.txt
+ uint32_t nbuckets = gnu_hash[0];
+ // The buckets array is located after the header (4 uint32) and the bloom
+ // filter (size_t array of gnu_hash[2] elements).
+ uint32_t *buckets = gnu_hash + 4 + sizeof(size_t) / 4 * gnu_hash[2];
+ // Find the chain of the last non-empty bucket.
+ uint32_t idx = 0;
+ for (uint32_t i = nbuckets; i > 0;) {
+ idx = buckets[--i];
+ if (idx != 0) break;
+ }
+ if (idx != 0) {
+ // Find the last element of the chain, which has an odd value.
+ // Add one to get the number of symbols.
+ uint32_t *chain = buckets + nbuckets - gnu_hash[1];
+ while (chain[idx++] % 2 == 0) {
+ }
+ }
+ num_syms_ = idx;
+ }
}
bool ElfMemImage::LookupSymbol(const char *name,
@@ -300,9 +327,9 @@ bool ElfMemImage::LookupSymbolByAddress(const void *address,
return false;
}
-ElfMemImage::SymbolIterator::SymbolIterator(const void *const image, int index)
- : index_(index), image_(image) {
-}
+ElfMemImage::SymbolIterator::SymbolIterator(const void *const image,
+ uint32_t index)
+ : index_(index), image_(image) {}
const ElfMemImage::SymbolInfo *ElfMemImage::SymbolIterator::operator->() const {
return &info_;
@@ -335,7 +362,7 @@ ElfMemImage::SymbolIterator ElfMemImage::end() const {
return SymbolIterator(this, GetNumSymbols());
}
-void ElfMemImage::SymbolIterator::Update(int increment) {
+void ElfMemImage::SymbolIterator::Update(uint32_t increment) {
const ElfMemImage *image = reinterpret_cast<const ElfMemImage *>(image_);
ABSL_RAW_CHECK(image->IsPresent() || increment == 0, "");
if (!image->IsPresent()) {
diff --git a/absl/debugging/internal/elf_mem_image.h b/absl/debugging/internal/elf_mem_image.h
index e7fe6ab0..19c4952e 100644
--- a/absl/debugging/internal/elf_mem_image.h
+++ b/absl/debugging/internal/elf_mem_image.h
@@ -22,6 +22,7 @@
// Including this will define the __GLIBC__ macro if glibc is being
// used.
#include <climits>
+#include <cstdint>
#include "absl/base/config.h"
@@ -82,10 +83,10 @@ class ElfMemImage {
bool operator!=(const SymbolIterator &rhs) const;
bool operator==(const SymbolIterator &rhs) const;
private:
- SymbolIterator(const void *const image, int index);
- void Update(int incr);
+ SymbolIterator(const void *const image, uint32_t index);
+ void Update(uint32_t incr);
SymbolInfo info_;
- int index_;
+ uint32_t index_;
const void *const image_;
};
@@ -94,14 +95,14 @@ class ElfMemImage {
void Init(const void *base);
bool IsPresent() const { return ehdr_ != nullptr; }
const ElfW(Phdr)* GetPhdr(int index) const;
- const ElfW(Sym)* GetDynsym(int index) const;
- const ElfW(Versym)* GetVersym(int index) const;
+ const ElfW(Sym) * GetDynsym(uint32_t index) const;
+ const ElfW(Versym)* GetVersym(uint32_t index) const;
const ElfW(Verdef)* GetVerdef(int index) const;
const ElfW(Verdaux)* GetVerdefAux(const ElfW(Verdef) *verdef) const;
const char* GetDynstr(ElfW(Word) offset) const;
const void* GetSymAddr(const ElfW(Sym) *sym) const;
const char* GetVerstr(ElfW(Word) offset) const;
- int GetNumSymbols() const;
+ uint32_t GetNumSymbols() const;
SymbolIterator begin() const;
SymbolIterator end() const;
@@ -124,8 +125,8 @@ class ElfMemImage {
const ElfW(Sym) *dynsym_;
const ElfW(Versym) *versym_;
const ElfW(Verdef) *verdef_;
- const ElfW(Word) *hash_;
const char *dynstr_;
+ uint32_t num_syms_;
size_t strsize_;
size_t verdefnum_;
ElfW(Addr) link_base_; // Link-time base (p_vaddr of first PT_LOAD).
diff --git a/absl/debugging/internal/stacktrace_aarch64-inl.inc b/absl/debugging/internal/stacktrace_aarch64-inl.inc
index 1caf7bbe..b123479b 100644
--- a/absl/debugging/internal/stacktrace_aarch64-inl.inc
+++ b/absl/debugging/internal/stacktrace_aarch64-inl.inc
@@ -89,6 +89,8 @@ struct StackInfo {
static bool InsideSignalStack(void** ptr, const StackInfo* stack_info) {
uintptr_t comparable_ptr = reinterpret_cast<uintptr_t>(ptr);
+ if (stack_info->sig_stack_high == kUnknownStackEnd)
+ return false;
return (comparable_ptr >= stack_info->sig_stack_low &&
comparable_ptr < stack_info->sig_stack_high);
}
@@ -122,13 +124,6 @@ static void **NextStackFrame(void **old_frame_pointer, const void *uc,
if (pre_signal_frame_pointer >= old_frame_pointer) {
new_frame_pointer = pre_signal_frame_pointer;
}
- // Check that alleged frame pointer is actually readable. This is to
- // prevent "double fault" in case we hit the first fault due to e.g.
- // stack corruption.
- if (!absl::debugging_internal::AddressIsReadable(
- new_frame_pointer))
- return nullptr;
- }
}
#endif
@@ -136,6 +131,14 @@ static void **NextStackFrame(void **old_frame_pointer, const void *uc,
if ((reinterpret_cast<uintptr_t>(new_frame_pointer) & 7) != 0)
return nullptr;
+ // Check that alleged frame pointer is actually readable. This is to
+ // prevent "double fault" in case we hit the first fault due to e.g.
+ // stack corruption.
+ if (!absl::debugging_internal::AddressIsReadable(
+ new_frame_pointer))
+ return nullptr;
+ }
+
// Only check the size if both frames are in the same stack.
if (InsideSignalStack(new_frame_pointer, stack_info) ==
InsideSignalStack(old_frame_pointer, stack_info)) {
diff --git a/absl/debugging/internal/utf8_for_code_point.cc b/absl/debugging/internal/utf8_for_code_point.cc
new file mode 100644
index 00000000..658a3b51
--- /dev/null
+++ b/absl/debugging/internal/utf8_for_code_point.cc
@@ -0,0 +1,70 @@
+// Copyright 2024 The Abseil Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/debugging/internal/utf8_for_code_point.h"
+
+#include <cstdint>
+
+#include "absl/base/config.h"
+
+namespace absl {
+ABSL_NAMESPACE_BEGIN
+namespace debugging_internal {
+namespace {
+
+// UTF-8 encoding bounds.
+constexpr uint32_t kMinSurrogate = 0xd800, kMaxSurrogate = 0xdfff;
+constexpr uint32_t kMax1ByteCodePoint = 0x7f;
+constexpr uint32_t kMax2ByteCodePoint = 0x7ff;
+constexpr uint32_t kMax3ByteCodePoint = 0xffff;
+constexpr uint32_t kMaxCodePoint = 0x10ffff;
+
+} // namespace
+
+Utf8ForCodePoint::Utf8ForCodePoint(uint64_t code_point) {
+ if (code_point <= kMax1ByteCodePoint) {
+ length = 1;
+ bytes[0] = static_cast<char>(code_point);
+ return;
+ }
+
+ if (code_point <= kMax2ByteCodePoint) {
+ length = 2;
+ bytes[0] = static_cast<char>(0xc0 | (code_point >> 6));
+ bytes[1] = static_cast<char>(0x80 | (code_point & 0x3f));
+ return;
+ }
+
+ if (kMinSurrogate <= code_point && code_point <= kMaxSurrogate) return;
+
+ if (code_point <= kMax3ByteCodePoint) {
+ length = 3;
+ bytes[0] = static_cast<char>(0xe0 | (code_point >> 12));
+ bytes[1] = static_cast<char>(0x80 | ((code_point >> 6) & 0x3f));
+ bytes[2] = static_cast<char>(0x80 | (code_point & 0x3f));
+ return;
+ }
+
+ if (code_point > kMaxCodePoint) return;
+
+ length = 4;
+ bytes[0] = static_cast<char>(0xf0 | (code_point >> 18));
+ bytes[1] = static_cast<char>(0x80 | ((code_point >> 12) & 0x3f));
+ bytes[2] = static_cast<char>(0x80 | ((code_point >> 6) & 0x3f));
+ bytes[3] = static_cast<char>(0x80 | (code_point & 0x3f));
+}
+
+} // namespace debugging_internal
+ABSL_NAMESPACE_END
+} // namespace absl
diff --git a/absl/debugging/internal/utf8_for_code_point.h b/absl/debugging/internal/utf8_for_code_point.h
new file mode 100644
index 00000000..f23cde6d
--- /dev/null
+++ b/absl/debugging/internal/utf8_for_code_point.h
@@ -0,0 +1,47 @@
+// Copyright 2024 The Abseil Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ABSL_DEBUGGING_INTERNAL_UTF8_FOR_CODE_POINT_H_
+#define ABSL_DEBUGGING_INTERNAL_UTF8_FOR_CODE_POINT_H_
+
+#include <cstdint>
+
+#include "absl/base/config.h"
+
+namespace absl {
+ABSL_NAMESPACE_BEGIN
+namespace debugging_internal {
+
+struct Utf8ForCodePoint {
+ // Converts a Unicode code point to the corresponding UTF-8 byte sequence.
+ // Async-signal-safe to support use in symbolizing stack traces from a signal
+ // handler.
+ explicit Utf8ForCodePoint(uint64_t code_point);
+
+ // Returns true if the constructor's code_point argument was valid.
+ bool ok() const { return length != 0; }
+
+ // If code_point was in range, then 1 <= length <= 4, and the UTF-8 encoding
+ // is found in bytes[0 .. (length - 1)]. If code_point was invalid, then
+ // length == 0. In either case, the contents of bytes[length .. 3] are
+ // unspecified.
+ char bytes[4] = {};
+ uint32_t length = 0;
+};
+
+} // namespace debugging_internal
+ABSL_NAMESPACE_END
+} // namespace absl
+
+#endif // ABSL_DEBUGGING_INTERNAL_UTF8_FOR_CODE_POINT_H_
diff --git a/absl/debugging/internal/utf8_for_code_point_test.cc b/absl/debugging/internal/utf8_for_code_point_test.cc
new file mode 100644
index 00000000..dd0591ae
--- /dev/null
+++ b/absl/debugging/internal/utf8_for_code_point_test.cc
@@ -0,0 +1,175 @@
+// Copyright 2024 The Abseil Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/debugging/internal/utf8_for_code_point.h"
+
+#include <cstdint>
+
+#include "gtest/gtest.h"
+#include "absl/base/config.h"
+
+namespace absl {
+ABSL_NAMESPACE_BEGIN
+namespace debugging_internal {
+namespace {
+
+TEST(Utf8ForCodePointTest, RecognizesTheSmallestCodePoint) {
+ Utf8ForCodePoint utf8(uint64_t{0});
+ ASSERT_EQ(utf8.length, 1);
+ EXPECT_EQ(utf8.bytes[0], '\0');
+}
+
+TEST(Utf8ForCodePointTest, RecognizesAsciiSmallA) {
+ Utf8ForCodePoint utf8(uint64_t{'a'});
+ ASSERT_EQ(utf8.length, 1);
+ EXPECT_EQ(utf8.bytes[0], 'a');
+}
+
+TEST(Utf8ForCodePointTest, RecognizesTheLargestOneByteCodePoint) {
+ Utf8ForCodePoint utf8(uint64_t{0x7f});
+ ASSERT_EQ(utf8.length, 1);
+ EXPECT_EQ(utf8.bytes[0], '\x7f');
+}
+
+TEST(Utf8ForCodePointTest, RecognizesTheSmallestTwoByteCodePoint) {
+ Utf8ForCodePoint utf8(uint64_t{0x80});
+ ASSERT_EQ(utf8.length, 2);
+ EXPECT_EQ(utf8.bytes[0], static_cast<char>(0xc2));
+ EXPECT_EQ(utf8.bytes[1], static_cast<char>(0x80));
+}
+
+TEST(Utf8ForCodePointTest, RecognizesSmallNWithTilde) {
+ Utf8ForCodePoint utf8(uint64_t{0xf1});
+ ASSERT_EQ(utf8.length, 2);
+ const char* want = "ñ";
+ EXPECT_EQ(utf8.bytes[0], want[0]);
+ EXPECT_EQ(utf8.bytes[1], want[1]);
+}
+
+TEST(Utf8ForCodePointTest, RecognizesCapitalPi) {
+ Utf8ForCodePoint utf8(uint64_t{0x3a0});
+ ASSERT_EQ(utf8.length, 2);
+ const char* want = "Π";
+ EXPECT_EQ(utf8.bytes[0], want[0]);
+ EXPECT_EQ(utf8.bytes[1], want[1]);
+}
+
+TEST(Utf8ForCodePointTest, RecognizesTheLargestTwoByteCodePoint) {
+ Utf8ForCodePoint utf8(uint64_t{0x7ff});
+ ASSERT_EQ(utf8.length, 2);
+ EXPECT_EQ(utf8.bytes[0], static_cast<char>(0xdf));
+ EXPECT_EQ(utf8.bytes[1], static_cast<char>(0xbf));
+}
+
+TEST(Utf8ForCodePointTest, RecognizesTheSmallestThreeByteCodePoint) {
+ Utf8ForCodePoint utf8(uint64_t{0x800});
+ ASSERT_EQ(utf8.length, 3);
+ EXPECT_EQ(utf8.bytes[0], static_cast<char>(0xe0));
+ EXPECT_EQ(utf8.bytes[1], static_cast<char>(0xa0));
+ EXPECT_EQ(utf8.bytes[2], static_cast<char>(0x80));
+}
+
+TEST(Utf8ForCodePointTest, RecognizesTheChineseCharacterZhong1AsInZhong1Wen2) {
+ Utf8ForCodePoint utf8(uint64_t{0x4e2d});
+ ASSERT_EQ(utf8.length, 3);
+ const char* want = "中";
+ EXPECT_EQ(utf8.bytes[0], want[0]);
+ EXPECT_EQ(utf8.bytes[1], want[1]);
+ EXPECT_EQ(utf8.bytes[2], want[2]);
+}
+
+TEST(Utf8ForCodePointTest, RecognizesOneBeforeTheSmallestSurrogate) {
+ Utf8ForCodePoint utf8(uint64_t{0xd7ff});
+ ASSERT_EQ(utf8.length, 3);
+ EXPECT_EQ(utf8.bytes[0], static_cast<char>(0xed));
+ EXPECT_EQ(utf8.bytes[1], static_cast<char>(0x9f));
+ EXPECT_EQ(utf8.bytes[2], static_cast<char>(0xbf));
+}
+
+TEST(Utf8ForCodePointTest, RejectsTheSmallestSurrogate) {
+ Utf8ForCodePoint utf8(uint64_t{0xd800});
+ EXPECT_EQ(utf8.length, 0);
+}
+
+TEST(Utf8ForCodePointTest, RejectsTheLargestSurrogate) {
+ Utf8ForCodePoint utf8(uint64_t{0xdfff});
+ EXPECT_EQ(utf8.length, 0);
+}
+
+TEST(Utf8ForCodePointTest, RecognizesOnePastTheLargestSurrogate) {
+ Utf8ForCodePoint utf8(uint64_t{0xe000});
+ ASSERT_EQ(utf8.length, 3);
+ EXPECT_EQ(utf8.bytes[0], static_cast<char>(0xee));
+ EXPECT_EQ(utf8.bytes[1], static_cast<char>(0x80));
+ EXPECT_EQ(utf8.bytes[2], static_cast<char>(0x80));
+}
+
+TEST(Utf8ForCodePointTest, RecognizesTheLargestThreeByteCodePoint) {
+ Utf8ForCodePoint utf8(uint64_t{0xffff});
+ ASSERT_EQ(utf8.length, 3);
+ EXPECT_EQ(utf8.bytes[0], static_cast<char>(0xef));
+ EXPECT_EQ(utf8.bytes[1], static_cast<char>(0xbf));
+ EXPECT_EQ(utf8.bytes[2], static_cast<char>(0xbf));
+}
+
+TEST(Utf8ForCodePointTest, RecognizesTheSmallestFourByteCodePoint) {
+ Utf8ForCodePoint utf8(uint64_t{0x10000});
+ ASSERT_EQ(utf8.length, 4);
+ EXPECT_EQ(utf8.bytes[0], static_cast<char>(0xf0));
+ EXPECT_EQ(utf8.bytes[1], static_cast<char>(0x90));
+ EXPECT_EQ(utf8.bytes[2], static_cast<char>(0x80));
+ EXPECT_EQ(utf8.bytes[3], static_cast<char>(0x80));
+}
+
+TEST(Utf8ForCodePointTest, RecognizesTheJackOfHearts) {
+ Utf8ForCodePoint utf8(uint64_t{0x1f0bb});
+ ASSERT_EQ(utf8.length, 4);
+ const char* want = "🂻";
+ EXPECT_EQ(utf8.bytes[0], want[0]);
+ EXPECT_EQ(utf8.bytes[1], want[1]);
+ EXPECT_EQ(utf8.bytes[2], want[2]);
+ EXPECT_EQ(utf8.bytes[3], want[3]);
+}
+
+TEST(Utf8ForCodePointTest, RecognizesTheLargestFourByteCodePoint) {
+ Utf8ForCodePoint utf8(uint64_t{0x10ffff});
+ ASSERT_EQ(utf8.length, 4);
+ EXPECT_EQ(utf8.bytes[0], static_cast<char>(0xf4));
+ EXPECT_EQ(utf8.bytes[1], static_cast<char>(0x8f));
+ EXPECT_EQ(utf8.bytes[2], static_cast<char>(0xbf));
+ EXPECT_EQ(utf8.bytes[3], static_cast<char>(0xbf));
+}
+
+TEST(Utf8ForCodePointTest, RejectsTheSmallestOverlargeCodePoint) {
+ Utf8ForCodePoint utf8(uint64_t{0x110000});
+ EXPECT_EQ(utf8.length, 0);
+}
+
+TEST(Utf8ForCodePointTest, RejectsAThroughlyOverlargeCodePoint) {
+ Utf8ForCodePoint utf8(uint64_t{0xffffffff00000000});
+ EXPECT_EQ(utf8.length, 0);
+}
+
+TEST(Utf8ForCodePointTest, OkReturnsTrueForAValidCodePoint) {
+ EXPECT_TRUE(Utf8ForCodePoint(uint64_t{0}).ok());
+}
+
+TEST(Utf8ForCodePointTest, OkReturnsFalseForAnInvalidCodePoint) {
+ EXPECT_FALSE(Utf8ForCodePoint(uint64_t{0xffffffff00000000}).ok());
+}
+
+} // namespace
+} // namespace debugging_internal
+ABSL_NAMESPACE_END
+} // namespace absl