aboutsummaryrefslogtreecommitdiff
path: root/absl/debugging/internal/demangle.cc
diff options
context:
space:
mode:
Diffstat (limited to 'absl/debugging/internal/demangle.cc')
-rw-r--r--absl/debugging/internal/demangle.cc1143
1 files changed, 1057 insertions, 86 deletions
diff --git a/absl/debugging/internal/demangle.cc b/absl/debugging/internal/demangle.cc
index 381a2b50..caac7636 100644
--- a/absl/debugging/internal/demangle.cc
+++ b/absl/debugging/internal/demangle.cc
@@ -14,18 +14,19 @@
// For reference check out:
// https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling
-//
-// Note that we only have partial C++11 support yet.
#include "absl/debugging/internal/demangle.h"
+#include <cstddef>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
+#include <cstring>
#include <limits>
#include <string>
#include "absl/base/config.h"
+#include "absl/debugging/internal/demangle_rust.h"
#if ABSL_INTERNAL_HAS_CXA_DEMANGLE
#include <cxxabi.h>
@@ -44,14 +45,16 @@ typedef struct {
// List of operators from Itanium C++ ABI.
static const AbbrevPair kOperatorList[] = {
- // New has special syntax (not currently supported).
+ // New has special syntax.
{"nw", "new", 0},
{"na", "new[]", 0},
- // Works except that the 'gs' prefix is not supported.
+ // Special-cased elsewhere to support the optional gs prefix.
{"dl", "delete", 1},
{"da", "delete[]", 1},
+ {"aw", "co_await", 1},
+
{"ps", "+", 1}, // "positive"
{"ng", "-", 1}, // "negative"
{"ad", "&", 1}, // "address-of"
@@ -79,6 +82,7 @@ static const AbbrevPair kOperatorList[] = {
{"rs", ">>", 2},
{"lS", "<<=", 2},
{"rS", ">>=", 2},
+ {"ss", "<=>", 2},
{"eq", "==", 2},
{"ne", "!=", 2},
{"lt", "<", 2},
@@ -98,6 +102,7 @@ static const AbbrevPair kOperatorList[] = {
{"qu", "?", 3},
{"st", "sizeof", 0}, // Special syntax
{"sz", "sizeof", 1}, // Not a real operator name, but used in expressions.
+ {"sZ", "sizeof...", 0}, // Special syntax
{nullptr, nullptr, 0},
};
@@ -187,9 +192,50 @@ typedef struct {
int recursion_depth; // For stack exhaustion prevention.
int steps; // Cap how much work we'll do, regardless of depth.
ParseState parse_state; // Backtrackable state copied for most frames.
+
+ // Conditionally compiled support for marking the position of the first
+ // construct Demangle couldn't parse. This preprocessor symbol is intended
+ // for use by Abseil demangler maintainers only; its behavior is not part of
+ // Abseil's public interface.
+#ifdef ABSL_INTERNAL_DEMANGLE_RECORDS_HIGH_WATER_MARK
+ int high_water_mark; // Input position where parsing failed.
+ bool too_complex; // True if any guard.IsTooComplex() call returned true.
+#endif
} State;
namespace {
+
+#ifdef ABSL_INTERNAL_DEMANGLE_RECORDS_HIGH_WATER_MARK
+void UpdateHighWaterMark(State *state) {
+ if (state->high_water_mark < state->parse_state.mangled_idx) {
+ state->high_water_mark = state->parse_state.mangled_idx;
+ }
+}
+
+void ReportHighWaterMark(State *state) {
+ // Write out the mangled name with the trouble point marked, provided that the
+ // output buffer is large enough and the mangled name did not hit a complexity
+ // limit (in which case the high water mark wouldn't point out an unparsable
+ // construct, only the point where a budget ran out).
+ const size_t input_length = std::strlen(state->mangled_begin);
+ if (input_length + 6 > static_cast<size_t>(state->out_end_idx) ||
+ state->too_complex) {
+ if (state->out_end_idx > 0) state->out[0] = '\0';
+ return;
+ }
+ const size_t high_water_mark = static_cast<size_t>(state->high_water_mark);
+ std::memcpy(state->out, state->mangled_begin, high_water_mark);
+ std::memcpy(state->out + high_water_mark, "--!--", 5);
+ std::memcpy(state->out + high_water_mark + 5,
+ state->mangled_begin + high_water_mark,
+ input_length - high_water_mark);
+ state->out[input_length + 5] = '\0';
+}
+#else
+void UpdateHighWaterMark(State *) {}
+void ReportHighWaterMark(State *) {}
+#endif
+
// Prevent deep recursion / stack exhaustion.
// Also prevent unbounded handling of complex inputs.
class ComplexityGuard {
@@ -201,7 +247,7 @@ class ComplexityGuard {
~ComplexityGuard() { --state_->recursion_depth; }
// 256 levels of recursion seems like a reasonable upper limit on depth.
- // 128 is not enough to demagle synthetic tests from demangle_unittest.txt:
+ // 128 is not enough to demangle synthetic tests from demangle_unittest.txt:
// "_ZaaZZZZ..." and "_ZaaZcvZcvZ..."
static constexpr int kRecursionDepthLimit = 256;
@@ -222,8 +268,14 @@ class ComplexityGuard {
static constexpr int kParseStepsLimit = 1 << 17;
bool IsTooComplex() const {
- return state_->recursion_depth > kRecursionDepthLimit ||
- state_->steps > kParseStepsLimit;
+ if (state_->recursion_depth > kRecursionDepthLimit ||
+ state_->steps > kParseStepsLimit) {
+#ifdef ABSL_INTERNAL_DEMANGLE_RECORDS_HIGH_WATER_MARK
+ state_->too_complex = true;
+#endif
+ return true;
+ }
+ return false;
}
private:
@@ -270,6 +322,10 @@ static void InitState(State* state,
state->out_end_idx = static_cast<int>(out_size);
state->recursion_depth = 0;
state->steps = 0;
+#ifdef ABSL_INTERNAL_DEMANGLE_RECORDS_HIGH_WATER_MARK
+ state->high_water_mark = 0;
+ state->too_complex = false;
+#endif
state->parse_state.mangled_idx = 0;
state->parse_state.out_cur_idx = 0;
@@ -291,13 +347,14 @@ static bool ParseOneCharToken(State *state, const char one_char_token) {
if (guard.IsTooComplex()) return false;
if (RemainingInput(state)[0] == one_char_token) {
++state->parse_state.mangled_idx;
+ UpdateHighWaterMark(state);
return true;
}
return false;
}
-// Returns true and advances "mangled_cur" if we find "two_char_token"
-// at "mangled_cur" position. It is assumed that "two_char_token" does
+// Returns true and advances "mangled_idx" if we find "two_char_token"
+// at "mangled_idx" position. It is assumed that "two_char_token" does
// not contain '\0'.
static bool ParseTwoCharToken(State *state, const char *two_char_token) {
ComplexityGuard guard(state);
@@ -305,11 +362,45 @@ static bool ParseTwoCharToken(State *state, const char *two_char_token) {
if (RemainingInput(state)[0] == two_char_token[0] &&
RemainingInput(state)[1] == two_char_token[1]) {
state->parse_state.mangled_idx += 2;
+ UpdateHighWaterMark(state);
return true;
}
return false;
}
+// Returns true and advances "mangled_idx" if we find "three_char_token"
+// at "mangled_idx" position. It is assumed that "three_char_token" does
+// not contain '\0'.
+static bool ParseThreeCharToken(State *state, const char *three_char_token) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ if (RemainingInput(state)[0] == three_char_token[0] &&
+ RemainingInput(state)[1] == three_char_token[1] &&
+ RemainingInput(state)[2] == three_char_token[2]) {
+ state->parse_state.mangled_idx += 3;
+ UpdateHighWaterMark(state);
+ return true;
+ }
+ return false;
+}
+
+// Returns true and advances "mangled_idx" if we find a copy of the
+// NUL-terminated string "long_token" at "mangled_idx" position.
+static bool ParseLongToken(State *state, const char *long_token) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ int i = 0;
+ for (; long_token[i] != '\0'; ++i) {
+ // Note that we cannot run off the end of the NUL-terminated input here.
+ // Inside the loop body, long_token[i] is known to be different from NUL.
+ // So if we read the NUL on the end of the input here, we return at once.
+ if (RemainingInput(state)[i] != long_token[i]) return false;
+ }
+ state->parse_state.mangled_idx += i;
+ UpdateHighWaterMark(state);
+ return true;
+}
+
// Returns true and advances "mangled_cur" if we find any character in
// "char_class" at "mangled_cur" position.
static bool ParseCharClass(State *state, const char *char_class) {
@@ -322,6 +413,7 @@ static bool ParseCharClass(State *state, const char *char_class) {
for (; *p != '\0'; ++p) {
if (RemainingInput(state)[0] == *p) {
++state->parse_state.mangled_idx;
+ UpdateHighWaterMark(state);
return true;
}
}
@@ -554,6 +646,7 @@ static bool ParseFloatNumber(State *state);
static bool ParseSeqId(State *state);
static bool ParseIdentifier(State *state, size_t length);
static bool ParseOperatorName(State *state, int *arity);
+static bool ParseConversionOperatorType(State *state);
static bool ParseSpecialName(State *state);
static bool ParseCallOffset(State *state);
static bool ParseNVOffset(State *state);
@@ -563,21 +656,33 @@ static bool ParseCtorDtorName(State *state);
static bool ParseDecltype(State *state);
static bool ParseType(State *state);
static bool ParseCVQualifiers(State *state);
+static bool ParseExtendedQualifier(State *state);
static bool ParseBuiltinType(State *state);
+static bool ParseVendorExtendedType(State *state);
static bool ParseFunctionType(State *state);
static bool ParseBareFunctionType(State *state);
+static bool ParseOverloadAttribute(State *state);
static bool ParseClassEnumType(State *state);
static bool ParseArrayType(State *state);
static bool ParsePointerToMemberType(State *state);
static bool ParseTemplateParam(State *state);
+static bool ParseTemplateParamDecl(State *state);
static bool ParseTemplateTemplateParam(State *state);
static bool ParseTemplateArgs(State *state);
static bool ParseTemplateArg(State *state);
static bool ParseBaseUnresolvedName(State *state);
static bool ParseUnresolvedName(State *state);
+static bool ParseUnresolvedQualifierLevel(State *state);
+static bool ParseUnionSelector(State* state);
+static bool ParseFunctionParam(State* state);
+static bool ParseBracedExpression(State *state);
static bool ParseExpression(State *state);
+static bool ParseInitializer(State *state);
static bool ParseExprPrimary(State *state);
-static bool ParseExprCastValue(State *state);
+static bool ParseExprCastValueAndTrailingE(State *state);
+static bool ParseQRequiresClauseExpr(State *state);
+static bool ParseRequirement(State *state);
+static bool ParseTypeConstraint(State *state);
static bool ParseLocalName(State *state);
static bool ParseLocalNameSuffix(State *state);
static bool ParseDiscriminator(State *state);
@@ -622,22 +727,34 @@ static bool ParseMangledName(State *state) {
}
// <encoding> ::= <(function) name> <bare-function-type>
+// [`Q` <requires-clause expr>]
// ::= <(data) name>
// ::= <special-name>
+//
+// NOTE: Based on http://shortn/_Hoq9qG83rx
static bool ParseEncoding(State *state) {
ComplexityGuard guard(state);
if (guard.IsTooComplex()) return false;
- // Implementing the first two productions together as <name>
- // [<bare-function-type>] avoids exponential blowup of backtracking.
+ // Since the first two productions both start with <name>, attempt
+ // to parse it only once to avoid exponential blowup of backtracking.
//
- // Since Optional(...) can't fail, there's no need to copy the state for
- // backtracking.
- if (ParseName(state) && Optional(ParseBareFunctionType(state))) {
+ // We're careful about exponential blowup because <encoding> recursively
+ // appears in other productions downstream of its first two productions,
+ // which means that every call to `ParseName` would possibly indirectly
+ // result in two calls to `ParseName` etc.
+ if (ParseName(state)) {
+ if (!ParseBareFunctionType(state)) {
+ return true; // <(data) name>
+ }
+
+ // Parsed: <(function) name> <bare-function-type>
+ // Pending: [`Q` <requires-clause expr>]
+ ParseQRequiresClauseExpr(state); // restores state on failure
return true;
}
if (ParseSpecialName(state)) {
- return true;
+ return true; // <special-name>
}
return false;
}
@@ -723,19 +840,26 @@ static bool ParseNestedName(State *state) {
// <prefix> ::= <prefix> <unqualified-name>
// ::= <template-prefix> <template-args>
// ::= <template-param>
+// ::= <decltype>
// ::= <substitution>
// ::= # empty
// <template-prefix> ::= <prefix> <(template) unqualified-name>
// ::= <template-param>
// ::= <substitution>
+// ::= <vendor-extended-type>
static bool ParsePrefix(State *state) {
ComplexityGuard guard(state);
if (guard.IsTooComplex()) return false;
bool has_something = false;
while (true) {
MaybeAppendSeparator(state);
- if (ParseTemplateParam(state) ||
+ if (ParseTemplateParam(state) || ParseDecltype(state) ||
ParseSubstitution(state, /*accept_std=*/true) ||
+ // Although the official grammar does not mention it, nested-names
+ // shaped like Nu14__some_builtinIiE6memberE occur in practice, and it
+ // is not clear what else a compiler is supposed to do when a
+ // vendor-extended type has named members.
+ ParseVendorExtendedType(state) ||
ParseUnscopedName(state) ||
(ParseOneCharToken(state, 'M') && ParseUnnamedTypeName(state))) {
has_something = true;
@@ -757,8 +881,14 @@ static bool ParsePrefix(State *state) {
// ::= <source-name> [<abi-tags>]
// ::= <local-source-name> [<abi-tags>]
// ::= <unnamed-type-name> [<abi-tags>]
+// ::= DC <source-name>+ E # C++17 structured binding
+// ::= F <source-name> # C++20 constrained friend
+// ::= F <operator-name> # C++20 constrained friend
//
// <local-source-name> is a GCC extension; see below.
+//
+// For the F notation for constrained friends, see
+// https://github.com/itanium-cxx-abi/cxx-abi/issues/24#issuecomment-1491130332.
static bool ParseUnqualifiedName(State *state) {
ComplexityGuard guard(state);
if (guard.IsTooComplex()) return false;
@@ -767,6 +897,23 @@ static bool ParseUnqualifiedName(State *state) {
ParseUnnamedTypeName(state)) {
return ParseAbiTags(state);
}
+
+ // DC <source-name>+ E
+ ParseState copy = state->parse_state;
+ if (ParseTwoCharToken(state, "DC") && OneOrMore(ParseSourceName, state) &&
+ ParseOneCharToken(state, 'E')) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // F <source-name>
+ // F <operator-name>
+ if (ParseOneCharToken(state, 'F') && MaybeAppend(state, "friend ") &&
+ (ParseSourceName(state) || ParseOperatorName(state, nullptr))) {
+ return true;
+ }
+ state->parse_state = copy;
+
return false;
}
@@ -824,7 +971,11 @@ static bool ParseLocalSourceName(State *state) {
// <unnamed-type-name> ::= Ut [<(nonnegative) number>] _
// ::= <closure-type-name>
// <closure-type-name> ::= Ul <lambda-sig> E [<(nonnegative) number>] _
-// <lambda-sig> ::= <(parameter) type>+
+// <lambda-sig> ::= <template-param-decl>* <(parameter) type>+
+//
+// For <template-param-decl>* in <lambda-sig> see:
+//
+// https://github.com/itanium-cxx-abi/cxx-abi/issues/31
static bool ParseUnnamedTypeName(State *state) {
ComplexityGuard guard(state);
if (guard.IsTooComplex()) return false;
@@ -847,6 +998,7 @@ static bool ParseUnnamedTypeName(State *state) {
// Closure type.
which = -1;
if (ParseTwoCharToken(state, "Ul") && DisableAppend(state) &&
+ ZeroOrMore(ParseTemplateParamDecl, state) &&
OneOrMore(ParseType, state) && RestoreAppend(state, copy.append) &&
ParseOneCharToken(state, 'E') && Optional(ParseNumber(state, &which)) &&
which <= std::numeric_limits<int>::max() - 2 && // Don't overflow.
@@ -888,6 +1040,7 @@ static bool ParseNumber(State *state, int *number_out) {
}
if (p != RemainingInput(state)) { // Conversion succeeded.
state->parse_state.mangled_idx += p - RemainingInput(state);
+ UpdateHighWaterMark(state);
if (number_out != nullptr) {
// Note: possibly truncate "number".
*number_out = static_cast<int>(number);
@@ -910,6 +1063,7 @@ static bool ParseFloatNumber(State *state) {
}
if (p != RemainingInput(state)) { // Conversion succeeded.
state->parse_state.mangled_idx += p - RemainingInput(state);
+ UpdateHighWaterMark(state);
return true;
}
return false;
@@ -928,6 +1082,7 @@ static bool ParseSeqId(State *state) {
}
if (p != RemainingInput(state)) { // Conversion succeeded.
state->parse_state.mangled_idx += p - RemainingInput(state);
+ UpdateHighWaterMark(state);
return true;
}
return false;
@@ -946,11 +1101,13 @@ static bool ParseIdentifier(State *state, size_t length) {
MaybeAppendWithLength(state, RemainingInput(state), length);
}
state->parse_state.mangled_idx += length;
+ UpdateHighWaterMark(state);
return true;
}
// <operator-name> ::= nw, and other two letters cases
// ::= cv <type> # (cast)
+// ::= li <source-name> # C++11 user-defined literal
// ::= v <digit> <source-name> # vendor extended operator
static bool ParseOperatorName(State *state, int *arity) {
ComplexityGuard guard(state);
@@ -961,7 +1118,7 @@ static bool ParseOperatorName(State *state, int *arity) {
// First check with "cv" (cast) case.
ParseState copy = state->parse_state;
if (ParseTwoCharToken(state, "cv") && MaybeAppend(state, "operator ") &&
- EnterNestedName(state) && ParseType(state) &&
+ EnterNestedName(state) && ParseConversionOperatorType(state) &&
LeaveNestedName(state, copy.nest_level)) {
if (arity != nullptr) {
*arity = 1;
@@ -970,6 +1127,13 @@ static bool ParseOperatorName(State *state, int *arity) {
}
state->parse_state = copy;
+ // Then user-defined literals.
+ if (ParseTwoCharToken(state, "li") && MaybeAppend(state, "operator\"\" ") &&
+ ParseSourceName(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
// Then vendor extended operators.
if (ParseOneCharToken(state, 'v') && ParseDigit(state, arity) &&
ParseSourceName(state)) {
@@ -997,36 +1161,120 @@ static bool ParseOperatorName(State *state, int *arity) {
}
MaybeAppend(state, p->real_name);
state->parse_state.mangled_idx += 2;
+ UpdateHighWaterMark(state);
return true;
}
}
return false;
}
+// <operator-name> ::= cv <type> # (cast)
+//
+// The name of a conversion operator is the one place where cv-qualifiers, *, &,
+// and other simple type combinators are expected to appear in our stripped-down
+// demangling (elsewhere they appear in function signatures or template
+// arguments, which we omit from the output). We make reasonable efforts to
+// render simple cases accurately.
+static bool ParseConversionOperatorType(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ ParseState copy = state->parse_state;
+
+ // Scan pointers, const, and other easy mangling prefixes with postfix
+ // demanglings. Remember the range of input for later rescanning.
+ //
+ // See `ParseType` and the `switch` below for the meaning of each char.
+ const char* begin_simple_prefixes = RemainingInput(state);
+ while (ParseCharClass(state, "OPRCGrVK")) {}
+ const char* end_simple_prefixes = RemainingInput(state);
+
+ // Emit the base type first.
+ if (!ParseType(state)) {
+ state->parse_state = copy;
+ return false;
+ }
+
+ // Then rescan the easy type combinators in reverse order to emit their
+ // demanglings in the expected output order.
+ while (begin_simple_prefixes != end_simple_prefixes) {
+ switch (*--end_simple_prefixes) {
+ case 'P':
+ MaybeAppend(state, "*");
+ break;
+ case 'R':
+ MaybeAppend(state, "&");
+ break;
+ case 'O':
+ MaybeAppend(state, "&&");
+ break;
+ case 'C':
+ MaybeAppend(state, " _Complex");
+ break;
+ case 'G':
+ MaybeAppend(state, " _Imaginary");
+ break;
+ case 'r':
+ MaybeAppend(state, " restrict");
+ break;
+ case 'V':
+ MaybeAppend(state, " volatile");
+ break;
+ case 'K':
+ MaybeAppend(state, " const");
+ break;
+ }
+ }
+ return true;
+}
+
// <special-name> ::= TV <type>
// ::= TT <type>
// ::= TI <type>
// ::= TS <type>
-// ::= TH <type> # thread-local
+// ::= TW <name> # thread-local wrapper
+// ::= TH <name> # thread-local initialization
// ::= Tc <call-offset> <call-offset> <(base) encoding>
// ::= GV <(object) name>
+// ::= GR <(object) name> [<seq-id>] _
// ::= T <call-offset> <(base) encoding>
+// ::= GTt <encoding> # transaction-safe entry point
+// ::= TA <template-arg> # nontype template parameter object
// G++ extensions:
// ::= TC <type> <(offset) number> _ <(base) type>
// ::= TF <type>
// ::= TJ <type>
-// ::= GR <name>
+// ::= GR <name> # without final _, perhaps an earlier form?
// ::= GA <encoding>
// ::= Th <call-offset> <(base) encoding>
// ::= Tv <call-offset> <(base) encoding>
//
-// Note: we don't care much about them since they don't appear in
-// stack traces. The are special data.
+// Note: Most of these are special data, not functions that occur in stack
+// traces. Exceptions are TW and TH, which denote functions supporting the
+// thread_local feature. For these see:
+//
+// https://maskray.me/blog/2021-02-14-all-about-thread-local-storage
+//
+// For TA see https://github.com/itanium-cxx-abi/cxx-abi/issues/63.
static bool ParseSpecialName(State *state) {
ComplexityGuard guard(state);
if (guard.IsTooComplex()) return false;
ParseState copy = state->parse_state;
- if (ParseOneCharToken(state, 'T') && ParseCharClass(state, "VTISH") &&
+
+ if (ParseTwoCharToken(state, "TW")) {
+ MaybeAppend(state, "thread-local wrapper routine for ");
+ if (ParseName(state)) return true;
+ state->parse_state = copy;
+ return false;
+ }
+
+ if (ParseTwoCharToken(state, "TH")) {
+ MaybeAppend(state, "thread-local initialization routine for ");
+ if (ParseName(state)) return true;
+ state->parse_state = copy;
+ return false;
+ }
+
+ if (ParseOneCharToken(state, 'T') && ParseCharClass(state, "VTIS") &&
ParseType(state)) {
return true;
}
@@ -1064,21 +1312,51 @@ static bool ParseSpecialName(State *state) {
}
state->parse_state = copy;
- if (ParseTwoCharToken(state, "GR") && ParseName(state)) {
+ // <special-name> ::= GR <(object) name> [<seq-id>] _ # modern standard
+ // ::= GR <(object) name> # also recognized
+ if (ParseTwoCharToken(state, "GR")) {
+ MaybeAppend(state, "reference temporary for ");
+ if (!ParseName(state)) {
+ state->parse_state = copy;
+ return false;
+ }
+ const bool has_seq_id = ParseSeqId(state);
+ const bool has_underscore = ParseOneCharToken(state, '_');
+ if (has_seq_id && !has_underscore) {
+ state->parse_state = copy;
+ return false;
+ }
return true;
}
- state->parse_state = copy;
if (ParseTwoCharToken(state, "GA") && ParseEncoding(state)) {
return true;
}
state->parse_state = copy;
+ if (ParseThreeCharToken(state, "GTt") &&
+ MaybeAppend(state, "transaction clone for ") && ParseEncoding(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
if (ParseOneCharToken(state, 'T') && ParseCharClass(state, "hv") &&
ParseCallOffset(state) && ParseEncoding(state)) {
return true;
}
state->parse_state = copy;
+
+ if (ParseTwoCharToken(state, "TA")) {
+ bool append = state->parse_state.append;
+ DisableAppend(state);
+ if (ParseTemplateArg(state)) {
+ RestoreAppend(state, append);
+ MaybeAppend(state, "template parameter object");
+ return true;
+ }
+ }
+ state->parse_state = copy;
+
return false;
}
@@ -1182,7 +1460,6 @@ static bool ParseDecltype(State *state) {
// ::= O <type> # rvalue reference-to (C++0x)
// ::= C <type> # complex pair (C 2000)
// ::= G <type> # imaginary (C 2000)
-// ::= U <source-name> <type> # vendor extended type qualifier
// ::= <builtin-type>
// ::= <function-type>
// ::= <class-enum-type> # note: just an alias for <name>
@@ -1193,7 +1470,9 @@ static bool ParseDecltype(State *state) {
// ::= <decltype>
// ::= <substitution>
// ::= Dp <type> # pack expansion of (C++0x)
-// ::= Dv <num-elems> _ # GNU vector extension
+// ::= Dv <(elements) number> _ <type> # GNU vector extension
+// ::= Dv <(bytes) expression> _ <type>
+// ::= Dk <type-constraint> # constrained auto
//
static bool ParseType(State *state) {
ComplexityGuard guard(state);
@@ -1236,12 +1515,6 @@ static bool ParseType(State *state) {
}
state->parse_state = copy;
- if (ParseOneCharToken(state, 'U') && ParseSourceName(state) &&
- ParseType(state)) {
- return true;
- }
- state->parse_state = copy;
-
if (ParseBuiltinType(state) || ParseFunctionType(state) ||
ParseClassEnumType(state) || ParseArrayType(state) ||
ParsePointerToMemberType(state) || ParseDecltype(state) ||
@@ -1260,54 +1533,160 @@ static bool ParseType(State *state) {
return true;
}
+ // GNU vector extension Dv <number> _ <type>
if (ParseTwoCharToken(state, "Dv") && ParseNumber(state, nullptr) &&
- ParseOneCharToken(state, '_')) {
+ ParseOneCharToken(state, '_') && ParseType(state)) {
return true;
}
state->parse_state = copy;
- return false;
+ // GNU vector extension Dv <expression> _ <type>
+ if (ParseTwoCharToken(state, "Dv") && ParseExpression(state) &&
+ ParseOneCharToken(state, '_') && ParseType(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ if (ParseTwoCharToken(state, "Dk") && ParseTypeConstraint(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // For this notation see CXXNameMangler::mangleType in Clang's source code.
+ // The relevant logic and its comment "not clear how to mangle this!" date
+ // from 2011, so it may be with us awhile.
+ return ParseLongToken(state, "_SUBSTPACK_");
}
+// <qualifiers> ::= <extended-qualifier>* <CV-qualifiers>
// <CV-qualifiers> ::= [r] [V] [K]
+//
// We don't allow empty <CV-qualifiers> to avoid infinite loop in
// ParseType().
static bool ParseCVQualifiers(State *state) {
ComplexityGuard guard(state);
if (guard.IsTooComplex()) return false;
int num_cv_qualifiers = 0;
+ while (ParseExtendedQualifier(state)) ++num_cv_qualifiers;
num_cv_qualifiers += ParseOneCharToken(state, 'r');
num_cv_qualifiers += ParseOneCharToken(state, 'V');
num_cv_qualifiers += ParseOneCharToken(state, 'K');
return num_cv_qualifiers > 0;
}
+// <extended-qualifier> ::= U <source-name> [<template-args>]
+static bool ParseExtendedQualifier(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ ParseState copy = state->parse_state;
+
+ if (!ParseOneCharToken(state, 'U')) return false;
+
+ bool append = state->parse_state.append;
+ DisableAppend(state);
+ if (!ParseSourceName(state)) {
+ state->parse_state = copy;
+ return false;
+ }
+ Optional(ParseTemplateArgs(state));
+ RestoreAppend(state, append);
+ return true;
+}
+
// <builtin-type> ::= v, etc. # single-character builtin types
-// ::= u <source-name>
+// ::= <vendor-extended-type>
// ::= Dd, etc. # two-character builtin types
+// ::= DB (<number> | <expression>) _ # _BitInt(N)
+// ::= DU (<number> | <expression>) _ # unsigned _BitInt(N)
+// ::= DF <number> _ # _FloatN (N bits)
+// ::= DF <number> x # _FloatNx
+// ::= DF16b # std::bfloat16_t
//
// Not supported:
-// ::= DF <number> _ # _FloatN (N bits)
-//
+// ::= [DS] DA <fixed-point-size>
+// ::= [DS] DR <fixed-point-size>
+// because real implementations of N1169 fixed-point are scant.
static bool ParseBuiltinType(State *state) {
ComplexityGuard guard(state);
if (guard.IsTooComplex()) return false;
- const AbbrevPair *p;
- for (p = kBuiltinTypeList; p->abbrev != nullptr; ++p) {
+ ParseState copy = state->parse_state;
+
+ // DB (<number> | <expression>) _ # _BitInt(N)
+ // DU (<number> | <expression>) _ # unsigned _BitInt(N)
+ if (ParseTwoCharToken(state, "DB") ||
+ (ParseTwoCharToken(state, "DU") && MaybeAppend(state, "unsigned "))) {
+ bool append = state->parse_state.append;
+ DisableAppend(state);
+ int number = -1;
+ if (!ParseNumber(state, &number) && !ParseExpression(state)) {
+ state->parse_state = copy;
+ return false;
+ }
+ RestoreAppend(state, append);
+
+ if (!ParseOneCharToken(state, '_')) {
+ state->parse_state = copy;
+ return false;
+ }
+
+ MaybeAppend(state, "_BitInt(");
+ if (number >= 0) {
+ MaybeAppendDecimal(state, number);
+ } else {
+ MaybeAppend(state, "?"); // the best we can do for dependent sizes
+ }
+ MaybeAppend(state, ")");
+ return true;
+ }
+
+ // DF <number> _ # _FloatN
+ // DF <number> x # _FloatNx
+ // DF16b # std::bfloat16_t
+ if (ParseTwoCharToken(state, "DF")) {
+ if (ParseThreeCharToken(state, "16b")) {
+ MaybeAppend(state, "std::bfloat16_t");
+ return true;
+ }
+ int number = 0;
+ if (!ParseNumber(state, &number)) {
+ state->parse_state = copy;
+ return false;
+ }
+ MaybeAppend(state, "_Float");
+ MaybeAppendDecimal(state, number);
+ if (ParseOneCharToken(state, 'x')) {
+ MaybeAppend(state, "x");
+ return true;
+ }
+ if (ParseOneCharToken(state, '_')) return true;
+ state->parse_state = copy;
+ return false;
+ }
+
+ for (const AbbrevPair *p = kBuiltinTypeList; p->abbrev != nullptr; ++p) {
// Guaranteed only 1- or 2-character strings in kBuiltinTypeList.
if (p->abbrev[1] == '\0') {
if (ParseOneCharToken(state, p->abbrev[0])) {
MaybeAppend(state, p->real_name);
- return true;
+ return true; // ::= v, etc. # single-character builtin types
}
} else if (p->abbrev[2] == '\0' && ParseTwoCharToken(state, p->abbrev)) {
MaybeAppend(state, p->real_name);
- return true;
+ return true; // ::= Dd, etc. # two-character builtin types
}
}
+ return ParseVendorExtendedType(state);
+}
+
+// <vendor-extended-type> ::= u <source-name> [<template-args>]
+static bool ParseVendorExtendedType(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+
ParseState copy = state->parse_state;
- if (ParseOneCharToken(state, 'u') && ParseSourceName(state)) {
+ if (ParseOneCharToken(state, 'u') && ParseSourceName(state) &&
+ Optional(ParseTemplateArgs(state))) {
return true;
}
state->parse_state = copy;
@@ -1342,28 +1721,44 @@ static bool ParseExceptionSpec(State *state) {
return false;
}
-// <function-type> ::= [exception-spec] F [Y] <bare-function-type> [O] E
+// <function-type> ::=
+// [exception-spec] [Dx] F [Y] <bare-function-type> [<ref-qualifier>] E
+//
+// <ref-qualifier> ::= R | O
static bool ParseFunctionType(State *state) {
ComplexityGuard guard(state);
if (guard.IsTooComplex()) return false;
ParseState copy = state->parse_state;
- if (Optional(ParseExceptionSpec(state)) && ParseOneCharToken(state, 'F') &&
- Optional(ParseOneCharToken(state, 'Y')) && ParseBareFunctionType(state) &&
- Optional(ParseOneCharToken(state, 'O')) &&
- ParseOneCharToken(state, 'E')) {
- return true;
+ Optional(ParseExceptionSpec(state));
+ Optional(ParseTwoCharToken(state, "Dx"));
+ if (!ParseOneCharToken(state, 'F')) {
+ state->parse_state = copy;
+ return false;
}
- state->parse_state = copy;
- return false;
+ Optional(ParseOneCharToken(state, 'Y'));
+ if (!ParseBareFunctionType(state)) {
+ state->parse_state = copy;
+ return false;
+ }
+ Optional(ParseCharClass(state, "RO"));
+ if (!ParseOneCharToken(state, 'E')) {
+ state->parse_state = copy;
+ return false;
+ }
+ return true;
}
-// <bare-function-type> ::= <(signature) type>+
+// <bare-function-type> ::= <overload-attribute>* <(signature) type>+
+//
+// The <overload-attribute>* prefix is nonstandard; see the comment on
+// ParseOverloadAttribute.
static bool ParseBareFunctionType(State *state) {
ComplexityGuard guard(state);
if (guard.IsTooComplex()) return false;
ParseState copy = state->parse_state;
DisableAppend(state);
- if (OneOrMore(ParseType, state)) {
+ if (ZeroOrMore(ParseOverloadAttribute, state) &&
+ OneOrMore(ParseType, state)) {
RestoreAppend(state, copy.append);
MaybeAppend(state, "()");
return true;
@@ -1372,11 +1767,43 @@ static bool ParseBareFunctionType(State *state) {
return false;
}
+// <overload-attribute> ::= Ua <name>
+//
+// The nonstandard <overload-attribute> production is sufficient to accept the
+// current implementation of __attribute__((enable_if(condition, "message")))
+// and future attributes of a similar shape. See
+// https://clang.llvm.org/docs/AttributeReference.html#enable-if and the
+// definition of CXXNameMangler::mangleFunctionEncodingBareType in Clang's
+// source code.
+static bool ParseOverloadAttribute(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ ParseState copy = state->parse_state;
+ if (ParseTwoCharToken(state, "Ua") && ParseName(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+ return false;
+}
+
// <class-enum-type> ::= <name>
+// ::= Ts <name> # struct Name or class Name
+// ::= Tu <name> # union Name
+// ::= Te <name> # enum Name
+//
+// See http://shortn/_W3YrltiEd0.
static bool ParseClassEnumType(State *state) {
ComplexityGuard guard(state);
if (guard.IsTooComplex()) return false;
- return ParseName(state);
+ ParseState copy = state->parse_state;
+ if (Optional(ParseTwoCharToken(state, "Ts") ||
+ ParseTwoCharToken(state, "Tu") ||
+ ParseTwoCharToken(state, "Te")) &&
+ ParseName(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+ return false;
}
// <array-type> ::= A <(positive dimension) number> _ <(element) type>
@@ -1413,21 +1840,83 @@ static bool ParsePointerToMemberType(State *state) {
// <template-param> ::= T_
// ::= T <parameter-2 non-negative number> _
+// ::= TL <level-1> __
+// ::= TL <level-1> _ <parameter-2 non-negative number> _
static bool ParseTemplateParam(State *state) {
ComplexityGuard guard(state);
if (guard.IsTooComplex()) return false;
if (ParseTwoCharToken(state, "T_")) {
MaybeAppend(state, "?"); // We don't support template substitutions.
- return true;
+ return true; // ::= T_
}
ParseState copy = state->parse_state;
if (ParseOneCharToken(state, 'T') && ParseNumber(state, nullptr) &&
ParseOneCharToken(state, '_')) {
MaybeAppend(state, "?"); // We don't support template substitutions.
+ return true; // ::= T <parameter-2 non-negative number> _
+ }
+ state->parse_state = copy;
+
+ if (ParseTwoCharToken(state, "TL") && ParseNumber(state, nullptr)) {
+ if (ParseTwoCharToken(state, "__")) {
+ MaybeAppend(state, "?"); // We don't support template substitutions.
+ return true; // ::= TL <level-1> __
+ }
+
+ if (ParseOneCharToken(state, '_') && ParseNumber(state, nullptr) &&
+ ParseOneCharToken(state, '_')) {
+ MaybeAppend(state, "?"); // We don't support template substitutions.
+ return true; // ::= TL <level-1> _ <parameter-2 non-negative number> _
+ }
+ }
+ state->parse_state = copy;
+ return false;
+}
+
+// <template-param-decl>
+// ::= Ty # template type parameter
+// ::= Tk <concept name> [<template-args>] # constrained type parameter
+// ::= Tn <type> # template non-type parameter
+// ::= Tt <template-param-decl>* E # template template parameter
+// ::= Tp <template-param-decl> # template parameter pack
+//
+// NOTE: <concept name> is just a <name>: http://shortn/_MqJVyr0fc1
+// TODO(b/324066279): Implement optional suffix for `Tt`:
+// [Q <requires-clause expr>]
+static bool ParseTemplateParamDecl(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ ParseState copy = state->parse_state;
+
+ if (ParseTwoCharToken(state, "Ty")) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ if (ParseTwoCharToken(state, "Tk") && ParseName(state) &&
+ Optional(ParseTemplateArgs(state))) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ if (ParseTwoCharToken(state, "Tn") && ParseType(state)) {
return true;
}
state->parse_state = copy;
+
+ if (ParseTwoCharToken(state, "Tt") &&
+ ZeroOrMore(ParseTemplateParamDecl, state) &&
+ ParseOneCharToken(state, 'E')) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ if (ParseTwoCharToken(state, "Tp") && ParseTemplateParamDecl(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
return false;
}
@@ -1441,13 +1930,14 @@ static bool ParseTemplateTemplateParam(State *state) {
ParseSubstitution(state, /*accept_std=*/false));
}
-// <template-args> ::= I <template-arg>+ E
+// <template-args> ::= I <template-arg>+ [Q <requires-clause expr>] E
static bool ParseTemplateArgs(State *state) {
ComplexityGuard guard(state);
if (guard.IsTooComplex()) return false;
ParseState copy = state->parse_state;
DisableAppend(state);
if (ParseOneCharToken(state, 'I') && OneOrMore(ParseTemplateArg, state) &&
+ Optional(ParseQRequiresClauseExpr(state)) &&
ParseOneCharToken(state, 'E')) {
RestoreAppend(state, copy.append);
MaybeAppend(state, "<>");
@@ -1457,7 +1947,8 @@ static bool ParseTemplateArgs(State *state) {
return false;
}
-// <template-arg> ::= <type>
+// <template-arg> ::= <template-param-decl> <template-arg>
+// ::= <type>
// ::= <expr-primary>
// ::= J <template-arg>* E # argument pack
// ::= X <expression> E
@@ -1541,7 +2032,7 @@ static bool ParseTemplateArg(State *state) {
// ::= L <source-name> [<template-args>] [<expr-cast-value> E]
if (ParseLocalSourceName(state) && Optional(ParseTemplateArgs(state))) {
copy = state->parse_state;
- if (ParseExprCastValue(state) && ParseOneCharToken(state, 'E')) {
+ if (ParseExprCastValueAndTrailingE(state)) {
return true;
}
state->parse_state = copy;
@@ -1560,6 +2051,12 @@ static bool ParseTemplateArg(State *state) {
return true;
}
state->parse_state = copy;
+
+ if (ParseTemplateParamDecl(state) && ParseTemplateArg(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
return false;
}
@@ -1614,6 +2111,13 @@ static bool ParseBaseUnresolvedName(State *state) {
// <base-unresolved-name>
// ::= [gs] sr <unresolved-qualifier-level>+ E
// <base-unresolved-name>
+// ::= sr St <simple-id> <simple-id> # nonstandard
+//
+// The last case is not part of the official grammar but has been observed in
+// real-world examples that the GNU demangler (but not the LLVM demangler) is
+// able to decode; see demangle_test.cc for one such symbol name. The shape
+// sr St <simple-id> <simple-id> was inferred by closed-box testing of the GNU
+// demangler.
static bool ParseUnresolvedName(State *state) {
ComplexityGuard guard(state);
if (guard.IsTooComplex()) return false;
@@ -1633,7 +2137,7 @@ static bool ParseUnresolvedName(State *state) {
if (ParseTwoCharToken(state, "sr") && ParseOneCharToken(state, 'N') &&
ParseUnresolvedType(state) &&
- OneOrMore(/* <unresolved-qualifier-level> ::= */ ParseSimpleId, state) &&
+ OneOrMore(ParseUnresolvedQualifierLevel, state) &&
ParseOneCharToken(state, 'E') && ParseBaseUnresolvedName(state)) {
return true;
}
@@ -1641,35 +2145,160 @@ static bool ParseUnresolvedName(State *state) {
if (Optional(ParseTwoCharToken(state, "gs")) &&
ParseTwoCharToken(state, "sr") &&
- OneOrMore(/* <unresolved-qualifier-level> ::= */ ParseSimpleId, state) &&
+ OneOrMore(ParseUnresolvedQualifierLevel, state) &&
ParseOneCharToken(state, 'E') && ParseBaseUnresolvedName(state)) {
return true;
}
state->parse_state = copy;
+ if (ParseTwoCharToken(state, "sr") && ParseTwoCharToken(state, "St") &&
+ ParseSimpleId(state) && ParseSimpleId(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
return false;
}
+// <unresolved-qualifier-level> ::= <simple-id>
+// ::= <substitution> <template-args>
+//
+// The production <substitution> <template-args> is nonstandard but is observed
+// in practice. An upstream discussion on the best shape of <unresolved-name>
+// has not converged:
+//
+// https://github.com/itanium-cxx-abi/cxx-abi/issues/38
+static bool ParseUnresolvedQualifierLevel(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+
+ if (ParseSimpleId(state)) return true;
+
+ ParseState copy = state->parse_state;
+ if (ParseSubstitution(state, /*accept_std=*/false) &&
+ ParseTemplateArgs(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+ return false;
+}
+
+// <union-selector> ::= _ [<number>]
+//
+// https://github.com/itanium-cxx-abi/cxx-abi/issues/47
+static bool ParseUnionSelector(State *state) {
+ return ParseOneCharToken(state, '_') && Optional(ParseNumber(state, nullptr));
+}
+
+// <function-param> ::= fp <(top-level) CV-qualifiers> _
+// ::= fp <(top-level) CV-qualifiers> <number> _
+// ::= fL <number> p <(top-level) CV-qualifiers> _
+// ::= fL <number> p <(top-level) CV-qualifiers> <number> _
+// ::= fpT # this
+static bool ParseFunctionParam(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+
+ ParseState copy = state->parse_state;
+
+ // Function-param expression (level 0).
+ if (ParseTwoCharToken(state, "fp") && Optional(ParseCVQualifiers(state)) &&
+ Optional(ParseNumber(state, nullptr)) && ParseOneCharToken(state, '_')) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // Function-param expression (level 1+).
+ if (ParseTwoCharToken(state, "fL") && Optional(ParseNumber(state, nullptr)) &&
+ ParseOneCharToken(state, 'p') && Optional(ParseCVQualifiers(state)) &&
+ Optional(ParseNumber(state, nullptr)) && ParseOneCharToken(state, '_')) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ return ParseThreeCharToken(state, "fpT");
+}
+
+// <braced-expression> ::= <expression>
+// ::= di <field source-name> <braced-expression>
+// ::= dx <index expression> <braced-expression>
+// ::= dX <expression> <expression> <braced-expression>
+static bool ParseBracedExpression(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+
+ ParseState copy = state->parse_state;
+
+ if (ParseTwoCharToken(state, "di") && ParseSourceName(state) &&
+ ParseBracedExpression(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ if (ParseTwoCharToken(state, "dx") && ParseExpression(state) &&
+ ParseBracedExpression(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ if (ParseTwoCharToken(state, "dX") &&
+ ParseExpression(state) && ParseExpression(state) &&
+ ParseBracedExpression(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ return ParseExpression(state);
+}
+
// <expression> ::= <1-ary operator-name> <expression>
// ::= <2-ary operator-name> <expression> <expression>
// ::= <3-ary operator-name> <expression> <expression> <expression>
+// ::= pp_ <expression> # ++e; pp <expression> is e++
+// ::= mm_ <expression> # --e; mm <expression> is e--
// ::= cl <expression>+ E
// ::= cp <simple-id> <expression>* E # Clang-specific.
+// ::= so <type> <expression> [<number>] <union-selector>* [p] E
// ::= cv <type> <expression> # type (expression)
// ::= cv <type> _ <expression>* E # type (expr-list)
+// ::= tl <type> <braced-expression>* E
+// ::= il <braced-expression>* E
+// ::= [gs] nw <expression>* _ <type> E
+// ::= [gs] nw <expression>* _ <type> <initializer>
+// ::= [gs] na <expression>* _ <type> E
+// ::= [gs] na <expression>* _ <type> <initializer>
+// ::= [gs] dl <expression>
+// ::= [gs] da <expression>
+// ::= dc <type> <expression>
+// ::= sc <type> <expression>
+// ::= cc <type> <expression>
+// ::= rc <type> <expression>
+// ::= ti <type>
+// ::= te <expression>
// ::= st <type>
+// ::= at <type>
+// ::= az <expression>
+// ::= nx <expression>
// ::= <template-param>
// ::= <function-param>
+// ::= sZ <template-param>
+// ::= sZ <function-param>
+// ::= sP <template-arg>* E
// ::= <expr-primary>
// ::= dt <expression> <unresolved-name> # expr.name
// ::= pt <expression> <unresolved-name> # expr->name
// ::= sp <expression> # argument pack expansion
+// ::= fl <binary operator-name> <expression>
+// ::= fr <binary operator-name> <expression>
+// ::= fL <binary operator-name> <expression> <expression>
+// ::= fR <binary operator-name> <expression> <expression>
+// ::= tw <expression>
+// ::= tr
// ::= sr <type> <unqualified-name> <template-args>
// ::= sr <type> <unqualified-name>
-// <function-param> ::= fp <(top-level) CV-qualifiers> _
-// ::= fp <(top-level) CV-qualifiers> <number> _
-// ::= fL <number> p <(top-level) CV-qualifiers> _
-// ::= fL <number> p <(top-level) CV-qualifiers> <number> _
+// ::= u <source-name> <template-arg>* E # vendor extension
+// ::= rq <requirement>+ E
+// ::= rQ <bare-function-type> _ <requirement>+ E
static bool ParseExpression(State *state) {
ComplexityGuard guard(state);
if (guard.IsTooComplex()) return false;
@@ -1686,6 +2315,15 @@ static bool ParseExpression(State *state) {
}
state->parse_state = copy;
+ // Preincrement and predecrement. Postincrement and postdecrement are handled
+ // by the operator-name logic later on.
+ if ((ParseThreeCharToken(state, "pp_") ||
+ ParseThreeCharToken(state, "mm_")) &&
+ ParseExpression(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
// Clang-specific "cp <simple-id> <expression>* E"
// https://clang.llvm.org/doxygen/ItaniumMangle_8cpp_source.html#l04338
if (ParseTwoCharToken(state, "cp") && ParseSimpleId(state) &&
@@ -1694,17 +2332,65 @@ static bool ParseExpression(State *state) {
}
state->parse_state = copy;
- // Function-param expression (level 0).
- if (ParseTwoCharToken(state, "fp") && Optional(ParseCVQualifiers(state)) &&
- Optional(ParseNumber(state, nullptr)) && ParseOneCharToken(state, '_')) {
+ // <expression> ::= so <type> <expression> [<number>] <union-selector>* [p] E
+ //
+ // https://github.com/itanium-cxx-abi/cxx-abi/issues/47
+ if (ParseTwoCharToken(state, "so") && ParseType(state) &&
+ ParseExpression(state) && Optional(ParseNumber(state, nullptr)) &&
+ ZeroOrMore(ParseUnionSelector, state) &&
+ Optional(ParseOneCharToken(state, 'p')) &&
+ ParseOneCharToken(state, 'E')) {
return true;
}
state->parse_state = copy;
- // Function-param expression (level 1+).
- if (ParseTwoCharToken(state, "fL") && Optional(ParseNumber(state, nullptr)) &&
- ParseOneCharToken(state, 'p') && Optional(ParseCVQualifiers(state)) &&
- Optional(ParseNumber(state, nullptr)) && ParseOneCharToken(state, '_')) {
+ // <expression> ::= <function-param>
+ if (ParseFunctionParam(state)) return true;
+ state->parse_state = copy;
+
+ // <expression> ::= tl <type> <braced-expression>* E
+ if (ParseTwoCharToken(state, "tl") && ParseType(state) &&
+ ZeroOrMore(ParseBracedExpression, state) &&
+ ParseOneCharToken(state, 'E')) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // <expression> ::= il <braced-expression>* E
+ if (ParseTwoCharToken(state, "il") &&
+ ZeroOrMore(ParseBracedExpression, state) &&
+ ParseOneCharToken(state, 'E')) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // <expression> ::= [gs] nw <expression>* _ <type> E
+ // ::= [gs] nw <expression>* _ <type> <initializer>
+ // ::= [gs] na <expression>* _ <type> E
+ // ::= [gs] na <expression>* _ <type> <initializer>
+ if (Optional(ParseTwoCharToken(state, "gs")) &&
+ (ParseTwoCharToken(state, "nw") || ParseTwoCharToken(state, "na")) &&
+ ZeroOrMore(ParseExpression, state) && ParseOneCharToken(state, '_') &&
+ ParseType(state) &&
+ (ParseOneCharToken(state, 'E') || ParseInitializer(state))) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // <expression> ::= [gs] dl <expression>
+ // ::= [gs] da <expression>
+ if (Optional(ParseTwoCharToken(state, "gs")) &&
+ (ParseTwoCharToken(state, "dl") || ParseTwoCharToken(state, "da")) &&
+ ParseExpression(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // dynamic_cast, static_cast, const_cast, reinterpret_cast.
+ //
+ // <expression> ::= (dc | sc | cc | rc) <type> <expression>
+ if (ParseCharClass(state, "dscr") && ParseOneCharToken(state, 'c') &&
+ ParseType(state) && ParseExpression(state)) {
return true;
}
state->parse_state = copy;
@@ -1746,15 +2432,96 @@ static bool ParseExpression(State *state) {
}
state->parse_state = copy;
+ // typeid(type)
+ if (ParseTwoCharToken(state, "ti") && ParseType(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // typeid(expression)
+ if (ParseTwoCharToken(state, "te") && ParseExpression(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
// sizeof type
if (ParseTwoCharToken(state, "st") && ParseType(state)) {
return true;
}
state->parse_state = copy;
+ // alignof(type)
+ if (ParseTwoCharToken(state, "at") && ParseType(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // alignof(expression), a GNU extension
+ if (ParseTwoCharToken(state, "az") && ParseExpression(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // noexcept(expression) appearing as an expression in a dependent signature
+ if (ParseTwoCharToken(state, "nx") && ParseExpression(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // sizeof...(pack)
+ //
+ // <expression> ::= sZ <template-param>
+ // ::= sZ <function-param>
+ if (ParseTwoCharToken(state, "sZ") &&
+ (ParseFunctionParam(state) || ParseTemplateParam(state))) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // sizeof...(pack) captured from an alias template
+ //
+ // <expression> ::= sP <template-arg>* E
+ if (ParseTwoCharToken(state, "sP") && ZeroOrMore(ParseTemplateArg, state) &&
+ ParseOneCharToken(state, 'E')) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // Unary folds (... op pack) and (pack op ...).
+ //
+ // <expression> ::= fl <binary operator-name> <expression>
+ // ::= fr <binary operator-name> <expression>
+ if ((ParseTwoCharToken(state, "fl") || ParseTwoCharToken(state, "fr")) &&
+ ParseOperatorName(state, nullptr) && ParseExpression(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // Binary folds (init op ... op pack) and (pack op ... op init).
+ //
+ // <expression> ::= fL <binary operator-name> <expression> <expression>
+ // ::= fR <binary operator-name> <expression> <expression>
+ if ((ParseTwoCharToken(state, "fL") || ParseTwoCharToken(state, "fR")) &&
+ ParseOperatorName(state, nullptr) && ParseExpression(state) &&
+ ParseExpression(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // tw <expression>: throw e
+ if (ParseTwoCharToken(state, "tw") && ParseExpression(state)) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // tr: throw (rethrows an exception from the handler that caught it)
+ if (ParseTwoCharToken(state, "tr")) return true;
+
// Object and pointer member access expressions.
+ //
+ // <expression> ::= (dt | pt) <expression> <unresolved-name>
if ((ParseTwoCharToken(state, "dt") || ParseTwoCharToken(state, "pt")) &&
- ParseExpression(state) && ParseType(state)) {
+ ParseExpression(state) && ParseUnresolvedName(state)) {
return true;
}
state->parse_state = copy;
@@ -1774,9 +2541,61 @@ static bool ParseExpression(State *state) {
}
state->parse_state = copy;
+ // Vendor extended expressions
+ if (ParseOneCharToken(state, 'u') && ParseSourceName(state) &&
+ ZeroOrMore(ParseTemplateArg, state) && ParseOneCharToken(state, 'E')) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // <expression> ::= rq <requirement>+ E
+ //
+ // https://github.com/itanium-cxx-abi/cxx-abi/issues/24
+ if (ParseTwoCharToken(state, "rq") && OneOrMore(ParseRequirement, state) &&
+ ParseOneCharToken(state, 'E')) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ // <expression> ::= rQ <bare-function-type> _ <requirement>+ E
+ //
+ // https://github.com/itanium-cxx-abi/cxx-abi/issues/24
+ if (ParseTwoCharToken(state, "rQ") && ParseBareFunctionType(state) &&
+ ParseOneCharToken(state, '_') && OneOrMore(ParseRequirement, state) &&
+ ParseOneCharToken(state, 'E')) {
+ return true;
+ }
+ state->parse_state = copy;
+
return ParseUnresolvedName(state);
}
+// <initializer> ::= pi <expression>* E
+// ::= il <braced-expression>* E
+//
+// The il ... E form is not in the ABI spec but is seen in practice for
+// braced-init-lists in new-expressions, which are standard syntax from C++11
+// on.
+static bool ParseInitializer(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ ParseState copy = state->parse_state;
+
+ if (ParseTwoCharToken(state, "pi") && ZeroOrMore(ParseExpression, state) &&
+ ParseOneCharToken(state, 'E')) {
+ return true;
+ }
+ state->parse_state = copy;
+
+ if (ParseTwoCharToken(state, "il") &&
+ ZeroOrMore(ParseBracedExpression, state) &&
+ ParseOneCharToken(state, 'E')) {
+ return true;
+ }
+ state->parse_state = copy;
+ return false;
+}
+
// <expr-primary> ::= L <type> <(value) number> E
// ::= L <type> <(value) float> E
// ::= L <mangled-name> E
@@ -1819,10 +2638,35 @@ static bool ParseExprPrimary(State *state) {
return false;
}
- // The merged cast production.
- if (ParseOneCharToken(state, 'L') && ParseType(state) &&
- ParseExprCastValue(state)) {
- return true;
+ if (ParseOneCharToken(state, 'L')) {
+ // There are two special cases in which a literal may or must contain a type
+ // without a value. The first is that both LDnE and LDn0E are valid
+ // encodings of nullptr, used in different situations. Recognize LDnE here,
+ // leaving LDn0E to be recognized by the general logic afterward.
+ if (ParseThreeCharToken(state, "DnE")) return true;
+
+ // The second special case is a string literal, currently mangled in C++98
+ // style as LA<length + 1>_KcE. This is inadequate to support C++11 and
+ // later versions, and the discussion of this problem has not converged.
+ //
+ // https://github.com/itanium-cxx-abi/cxx-abi/issues/64
+ //
+ // For now the bare-type mangling is what's used in practice, so we
+ // recognize this form and only this form if an array type appears here.
+ // Someday we'll probably have to accept a new form of value mangling in
+ // LA...E constructs. (Note also that C++20 allows a wide range of
+ // class-type objects as template arguments, so someday their values will be
+ // mangled and we'll have to recognize them here too.)
+ if (RemainingInput(state)[0] == 'A' /* an array type follows */) {
+ if (ParseType(state) && ParseOneCharToken(state, 'E')) return true;
+ state->parse_state = copy;
+ return false;
+ }
+
+ // The merged cast production.
+ if (ParseType(state) && ParseExprCastValueAndTrailingE(state)) {
+ return true;
+ }
}
state->parse_state = copy;
@@ -1836,7 +2680,7 @@ static bool ParseExprPrimary(State *state) {
}
// <number> or <float>, followed by 'E', as described above ParseExprPrimary.
-static bool ParseExprCastValue(State *state) {
+static bool ParseExprCastValueAndTrailingE(State *state) {
ComplexityGuard guard(state);
if (guard.IsTooComplex()) return false;
// We have to be able to backtrack after accepting a number because we could
@@ -1848,39 +2692,148 @@ static bool ParseExprCastValue(State *state) {
}
state->parse_state = copy;
- if (ParseFloatNumber(state) && ParseOneCharToken(state, 'E')) {
+ if (ParseFloatNumber(state)) {
+ // <float> for ordinary floating-point types
+ if (ParseOneCharToken(state, 'E')) return true;
+
+ // <float> _ <float> for complex floating-point types
+ if (ParseOneCharToken(state, '_') && ParseFloatNumber(state) &&
+ ParseOneCharToken(state, 'E')) {
+ return true;
+ }
+ }
+ state->parse_state = copy;
+
+ return false;
+}
+
+// Parses `Q <requires-clause expr>`.
+// If parsing fails, applies backtracking to `state`.
+//
+// This function covers two symbols instead of one for convenience,
+// because in LLVM's Itanium ABI mangling grammar, <requires-clause expr>
+// always appears after Q.
+//
+// Does not emit the parsed `requires` clause to simplify the implementation.
+// In other words, these two functions' mangled names will demangle identically:
+//
+// template <typename T>
+// int foo(T) requires IsIntegral<T>;
+//
+// vs.
+//
+// template <typename T>
+// int foo(T);
+static bool ParseQRequiresClauseExpr(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+ ParseState copy = state->parse_state;
+ DisableAppend(state);
+
+ // <requires-clause expr> is just an <expression>: http://shortn/_9E1Ul0rIM8
+ if (ParseOneCharToken(state, 'Q') && ParseExpression(state)) {
+ RestoreAppend(state, copy.append);
+ return true;
+ }
+
+ // also restores append
+ state->parse_state = copy;
+ return false;
+}
+
+// <requirement> ::= X <expression> [N] [R <type-constraint>]
+// <requirement> ::= T <type>
+// <requirement> ::= Q <constraint-expression>
+//
+// <constraint-expression> ::= <expression>
+//
+// https://github.com/itanium-cxx-abi/cxx-abi/issues/24
+static bool ParseRequirement(State *state) {
+ ComplexityGuard guard(state);
+ if (guard.IsTooComplex()) return false;
+
+ ParseState copy = state->parse_state;
+
+ if (ParseOneCharToken(state, 'X') && ParseExpression(state) &&
+ Optional(ParseOneCharToken(state, 'N')) &&
+ // This logic backtracks cleanly if we eat an R but a valid type doesn't
+ // follow it.
+ (!ParseOneCharToken(state, 'R') || ParseTypeConstraint(state))) {
return true;
}
state->parse_state = copy;
+ if (ParseOneCharToken(state, 'T') && ParseType(state)) return true;
+ state->parse_state = copy;
+
+ if (ParseOneCharToken(state, 'Q') && ParseExpression(state)) return true;
+ state->parse_state = copy;
+
return false;
}
+// <type-constraint> ::= <name>
+static bool ParseTypeConstraint(State *state) {
+ return ParseName(state);
+}
+
// <local-name> ::= Z <(function) encoding> E <(entity) name> [<discriminator>]
// ::= Z <(function) encoding> E s [<discriminator>]
+// ::= Z <(function) encoding> E d [<(parameter) number>] _ <name>
//
// Parsing a common prefix of these two productions together avoids an
// exponential blowup of backtracking. Parse like:
// <local-name> := Z <encoding> E <local-name-suffix>
// <local-name-suffix> ::= s [<discriminator>]
+// ::= d [<(parameter) number>] _ <name>
// ::= <name> [<discriminator>]
static bool ParseLocalNameSuffix(State *state) {
ComplexityGuard guard(state);
if (guard.IsTooComplex()) return false;
+ ParseState copy = state->parse_state;
+
+ // <local-name-suffix> ::= d [<(parameter) number>] _ <name>
+ if (ParseOneCharToken(state, 'd') &&
+ (IsDigit(RemainingInput(state)[0]) || RemainingInput(state)[0] == '_')) {
+ int number = -1;
+ Optional(ParseNumber(state, &number));
+ if (number < -1 || number > 2147483645) {
+ // Work around overflow cases. We do not expect these outside of a fuzzer
+ // or other source of adversarial input. If we do detect overflow here,
+ // we'll print {default arg#1}.
+ number = -1;
+ }
+ number += 2;
+
+ // The ::{default arg#1}:: infix must be rendered before the lambda itself,
+ // so print this before parsing the rest of the <local-name-suffix>.
+ MaybeAppend(state, "::{default arg#");
+ MaybeAppendDecimal(state, number);
+ MaybeAppend(state, "}::");
+ if (ParseOneCharToken(state, '_') && ParseName(state)) return true;
+
+ // On late parse failure, roll back not only the input but also the output,
+ // whose trailing NUL was overwritten.
+ state->parse_state = copy;
+ if (state->parse_state.append) {
+ state->out[state->parse_state.out_cur_idx] = '\0';
+ }
+ return false;
+ }
+ state->parse_state = copy;
+ // <local-name-suffix> ::= <name> [<discriminator>]
if (MaybeAppend(state, "::") && ParseName(state) &&
Optional(ParseDiscriminator(state))) {
return true;
}
-
- // Since we're not going to overwrite the above "::" by re-parsing the
- // <encoding> (whose trailing '\0' byte was in the byte now holding the
- // first ':'), we have to rollback the "::" if the <name> parse failed.
+ state->parse_state = copy;
if (state->parse_state.append) {
- state->out[state->parse_state.out_cur_idx - 2] = '\0';
+ state->out[state->parse_state.out_cur_idx] = '\0';
}
+ // <local-name-suffix> ::= s [<discriminator>]
return ParseOneCharToken(state, 's') && Optional(ParseDiscriminator(state));
}
@@ -1896,12 +2849,22 @@ static bool ParseLocalName(State *state) {
return false;
}
-// <discriminator> := _ <(non-negative) number>
+// <discriminator> := _ <digit>
+// := __ <number (>= 10)> _
static bool ParseDiscriminator(State *state) {
ComplexityGuard guard(state);
if (guard.IsTooComplex()) return false;
ParseState copy = state->parse_state;
- if (ParseOneCharToken(state, '_') && ParseNumber(state, nullptr)) {
+
+ // Both forms start with _ so parse that first.
+ if (!ParseOneCharToken(state, '_')) return false;
+
+ // <digit>
+ if (ParseDigit(state, nullptr)) return true;
+
+ // _ <number> _
+ if (ParseOneCharToken(state, '_') && ParseNumber(state, nullptr) &&
+ ParseOneCharToken(state, '_')) {
return true;
}
state->parse_state = copy;
@@ -1947,6 +2910,7 @@ static bool ParseSubstitution(State *state, bool accept_std) {
MaybeAppend(state, p->real_name);
}
++state->parse_state.mangled_idx;
+ UpdateHighWaterMark(state);
return true;
}
}
@@ -1972,10 +2936,13 @@ static bool ParseTopLevelMangledName(State *state) {
MaybeAppend(state, RemainingInput(state));
return true;
}
+ ReportHighWaterMark(state);
return false; // Unconsumed suffix.
}
return true;
}
+
+ ReportHighWaterMark(state);
return false;
}
@@ -1985,6 +2952,10 @@ static bool Overflowed(const State *state) {
// The demangler entry point.
bool Demangle(const char* mangled, char* out, size_t out_size) {
+ if (mangled[0] == '_' && mangled[1] == 'R') {
+ return DemangleRustSymbolEncoding(mangled, out, out_size);
+ }
+
State state;
InitState(&state, mangled, out, out_size);
return ParseTopLevelMangledName(&state) && !Overflowed(&state) &&