diff options
Diffstat (limited to 'absl/debugging/internal/demangle.cc')
-rw-r--r-- | absl/debugging/internal/demangle.cc | 1143 |
1 files changed, 1057 insertions, 86 deletions
diff --git a/absl/debugging/internal/demangle.cc b/absl/debugging/internal/demangle.cc index 381a2b50..caac7636 100644 --- a/absl/debugging/internal/demangle.cc +++ b/absl/debugging/internal/demangle.cc @@ -14,18 +14,19 @@ // For reference check out: // https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling -// -// Note that we only have partial C++11 support yet. #include "absl/debugging/internal/demangle.h" +#include <cstddef> #include <cstdint> #include <cstdio> #include <cstdlib> +#include <cstring> #include <limits> #include <string> #include "absl/base/config.h" +#include "absl/debugging/internal/demangle_rust.h" #if ABSL_INTERNAL_HAS_CXA_DEMANGLE #include <cxxabi.h> @@ -44,14 +45,16 @@ typedef struct { // List of operators from Itanium C++ ABI. static const AbbrevPair kOperatorList[] = { - // New has special syntax (not currently supported). + // New has special syntax. {"nw", "new", 0}, {"na", "new[]", 0}, - // Works except that the 'gs' prefix is not supported. + // Special-cased elsewhere to support the optional gs prefix. {"dl", "delete", 1}, {"da", "delete[]", 1}, + {"aw", "co_await", 1}, + {"ps", "+", 1}, // "positive" {"ng", "-", 1}, // "negative" {"ad", "&", 1}, // "address-of" @@ -79,6 +82,7 @@ static const AbbrevPair kOperatorList[] = { {"rs", ">>", 2}, {"lS", "<<=", 2}, {"rS", ">>=", 2}, + {"ss", "<=>", 2}, {"eq", "==", 2}, {"ne", "!=", 2}, {"lt", "<", 2}, @@ -98,6 +102,7 @@ static const AbbrevPair kOperatorList[] = { {"qu", "?", 3}, {"st", "sizeof", 0}, // Special syntax {"sz", "sizeof", 1}, // Not a real operator name, but used in expressions. + {"sZ", "sizeof...", 0}, // Special syntax {nullptr, nullptr, 0}, }; @@ -187,9 +192,50 @@ typedef struct { int recursion_depth; // For stack exhaustion prevention. int steps; // Cap how much work we'll do, regardless of depth. ParseState parse_state; // Backtrackable state copied for most frames. + + // Conditionally compiled support for marking the position of the first + // construct Demangle couldn't parse. This preprocessor symbol is intended + // for use by Abseil demangler maintainers only; its behavior is not part of + // Abseil's public interface. +#ifdef ABSL_INTERNAL_DEMANGLE_RECORDS_HIGH_WATER_MARK + int high_water_mark; // Input position where parsing failed. + bool too_complex; // True if any guard.IsTooComplex() call returned true. +#endif } State; namespace { + +#ifdef ABSL_INTERNAL_DEMANGLE_RECORDS_HIGH_WATER_MARK +void UpdateHighWaterMark(State *state) { + if (state->high_water_mark < state->parse_state.mangled_idx) { + state->high_water_mark = state->parse_state.mangled_idx; + } +} + +void ReportHighWaterMark(State *state) { + // Write out the mangled name with the trouble point marked, provided that the + // output buffer is large enough and the mangled name did not hit a complexity + // limit (in which case the high water mark wouldn't point out an unparsable + // construct, only the point where a budget ran out). + const size_t input_length = std::strlen(state->mangled_begin); + if (input_length + 6 > static_cast<size_t>(state->out_end_idx) || + state->too_complex) { + if (state->out_end_idx > 0) state->out[0] = '\0'; + return; + } + const size_t high_water_mark = static_cast<size_t>(state->high_water_mark); + std::memcpy(state->out, state->mangled_begin, high_water_mark); + std::memcpy(state->out + high_water_mark, "--!--", 5); + std::memcpy(state->out + high_water_mark + 5, + state->mangled_begin + high_water_mark, + input_length - high_water_mark); + state->out[input_length + 5] = '\0'; +} +#else +void UpdateHighWaterMark(State *) {} +void ReportHighWaterMark(State *) {} +#endif + // Prevent deep recursion / stack exhaustion. // Also prevent unbounded handling of complex inputs. class ComplexityGuard { @@ -201,7 +247,7 @@ class ComplexityGuard { ~ComplexityGuard() { --state_->recursion_depth; } // 256 levels of recursion seems like a reasonable upper limit on depth. - // 128 is not enough to demagle synthetic tests from demangle_unittest.txt: + // 128 is not enough to demangle synthetic tests from demangle_unittest.txt: // "_ZaaZZZZ..." and "_ZaaZcvZcvZ..." static constexpr int kRecursionDepthLimit = 256; @@ -222,8 +268,14 @@ class ComplexityGuard { static constexpr int kParseStepsLimit = 1 << 17; bool IsTooComplex() const { - return state_->recursion_depth > kRecursionDepthLimit || - state_->steps > kParseStepsLimit; + if (state_->recursion_depth > kRecursionDepthLimit || + state_->steps > kParseStepsLimit) { +#ifdef ABSL_INTERNAL_DEMANGLE_RECORDS_HIGH_WATER_MARK + state_->too_complex = true; +#endif + return true; + } + return false; } private: @@ -270,6 +322,10 @@ static void InitState(State* state, state->out_end_idx = static_cast<int>(out_size); state->recursion_depth = 0; state->steps = 0; +#ifdef ABSL_INTERNAL_DEMANGLE_RECORDS_HIGH_WATER_MARK + state->high_water_mark = 0; + state->too_complex = false; +#endif state->parse_state.mangled_idx = 0; state->parse_state.out_cur_idx = 0; @@ -291,13 +347,14 @@ static bool ParseOneCharToken(State *state, const char one_char_token) { if (guard.IsTooComplex()) return false; if (RemainingInput(state)[0] == one_char_token) { ++state->parse_state.mangled_idx; + UpdateHighWaterMark(state); return true; } return false; } -// Returns true and advances "mangled_cur" if we find "two_char_token" -// at "mangled_cur" position. It is assumed that "two_char_token" does +// Returns true and advances "mangled_idx" if we find "two_char_token" +// at "mangled_idx" position. It is assumed that "two_char_token" does // not contain '\0'. static bool ParseTwoCharToken(State *state, const char *two_char_token) { ComplexityGuard guard(state); @@ -305,11 +362,45 @@ static bool ParseTwoCharToken(State *state, const char *two_char_token) { if (RemainingInput(state)[0] == two_char_token[0] && RemainingInput(state)[1] == two_char_token[1]) { state->parse_state.mangled_idx += 2; + UpdateHighWaterMark(state); return true; } return false; } +// Returns true and advances "mangled_idx" if we find "three_char_token" +// at "mangled_idx" position. It is assumed that "three_char_token" does +// not contain '\0'. +static bool ParseThreeCharToken(State *state, const char *three_char_token) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + if (RemainingInput(state)[0] == three_char_token[0] && + RemainingInput(state)[1] == three_char_token[1] && + RemainingInput(state)[2] == three_char_token[2]) { + state->parse_state.mangled_idx += 3; + UpdateHighWaterMark(state); + return true; + } + return false; +} + +// Returns true and advances "mangled_idx" if we find a copy of the +// NUL-terminated string "long_token" at "mangled_idx" position. +static bool ParseLongToken(State *state, const char *long_token) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + int i = 0; + for (; long_token[i] != '\0'; ++i) { + // Note that we cannot run off the end of the NUL-terminated input here. + // Inside the loop body, long_token[i] is known to be different from NUL. + // So if we read the NUL on the end of the input here, we return at once. + if (RemainingInput(state)[i] != long_token[i]) return false; + } + state->parse_state.mangled_idx += i; + UpdateHighWaterMark(state); + return true; +} + // Returns true and advances "mangled_cur" if we find any character in // "char_class" at "mangled_cur" position. static bool ParseCharClass(State *state, const char *char_class) { @@ -322,6 +413,7 @@ static bool ParseCharClass(State *state, const char *char_class) { for (; *p != '\0'; ++p) { if (RemainingInput(state)[0] == *p) { ++state->parse_state.mangled_idx; + UpdateHighWaterMark(state); return true; } } @@ -554,6 +646,7 @@ static bool ParseFloatNumber(State *state); static bool ParseSeqId(State *state); static bool ParseIdentifier(State *state, size_t length); static bool ParseOperatorName(State *state, int *arity); +static bool ParseConversionOperatorType(State *state); static bool ParseSpecialName(State *state); static bool ParseCallOffset(State *state); static bool ParseNVOffset(State *state); @@ -563,21 +656,33 @@ static bool ParseCtorDtorName(State *state); static bool ParseDecltype(State *state); static bool ParseType(State *state); static bool ParseCVQualifiers(State *state); +static bool ParseExtendedQualifier(State *state); static bool ParseBuiltinType(State *state); +static bool ParseVendorExtendedType(State *state); static bool ParseFunctionType(State *state); static bool ParseBareFunctionType(State *state); +static bool ParseOverloadAttribute(State *state); static bool ParseClassEnumType(State *state); static bool ParseArrayType(State *state); static bool ParsePointerToMemberType(State *state); static bool ParseTemplateParam(State *state); +static bool ParseTemplateParamDecl(State *state); static bool ParseTemplateTemplateParam(State *state); static bool ParseTemplateArgs(State *state); static bool ParseTemplateArg(State *state); static bool ParseBaseUnresolvedName(State *state); static bool ParseUnresolvedName(State *state); +static bool ParseUnresolvedQualifierLevel(State *state); +static bool ParseUnionSelector(State* state); +static bool ParseFunctionParam(State* state); +static bool ParseBracedExpression(State *state); static bool ParseExpression(State *state); +static bool ParseInitializer(State *state); static bool ParseExprPrimary(State *state); -static bool ParseExprCastValue(State *state); +static bool ParseExprCastValueAndTrailingE(State *state); +static bool ParseQRequiresClauseExpr(State *state); +static bool ParseRequirement(State *state); +static bool ParseTypeConstraint(State *state); static bool ParseLocalName(State *state); static bool ParseLocalNameSuffix(State *state); static bool ParseDiscriminator(State *state); @@ -622,22 +727,34 @@ static bool ParseMangledName(State *state) { } // <encoding> ::= <(function) name> <bare-function-type> +// [`Q` <requires-clause expr>] // ::= <(data) name> // ::= <special-name> +// +// NOTE: Based on http://shortn/_Hoq9qG83rx static bool ParseEncoding(State *state) { ComplexityGuard guard(state); if (guard.IsTooComplex()) return false; - // Implementing the first two productions together as <name> - // [<bare-function-type>] avoids exponential blowup of backtracking. + // Since the first two productions both start with <name>, attempt + // to parse it only once to avoid exponential blowup of backtracking. // - // Since Optional(...) can't fail, there's no need to copy the state for - // backtracking. - if (ParseName(state) && Optional(ParseBareFunctionType(state))) { + // We're careful about exponential blowup because <encoding> recursively + // appears in other productions downstream of its first two productions, + // which means that every call to `ParseName` would possibly indirectly + // result in two calls to `ParseName` etc. + if (ParseName(state)) { + if (!ParseBareFunctionType(state)) { + return true; // <(data) name> + } + + // Parsed: <(function) name> <bare-function-type> + // Pending: [`Q` <requires-clause expr>] + ParseQRequiresClauseExpr(state); // restores state on failure return true; } if (ParseSpecialName(state)) { - return true; + return true; // <special-name> } return false; } @@ -723,19 +840,26 @@ static bool ParseNestedName(State *state) { // <prefix> ::= <prefix> <unqualified-name> // ::= <template-prefix> <template-args> // ::= <template-param> +// ::= <decltype> // ::= <substitution> // ::= # empty // <template-prefix> ::= <prefix> <(template) unqualified-name> // ::= <template-param> // ::= <substitution> +// ::= <vendor-extended-type> static bool ParsePrefix(State *state) { ComplexityGuard guard(state); if (guard.IsTooComplex()) return false; bool has_something = false; while (true) { MaybeAppendSeparator(state); - if (ParseTemplateParam(state) || + if (ParseTemplateParam(state) || ParseDecltype(state) || ParseSubstitution(state, /*accept_std=*/true) || + // Although the official grammar does not mention it, nested-names + // shaped like Nu14__some_builtinIiE6memberE occur in practice, and it + // is not clear what else a compiler is supposed to do when a + // vendor-extended type has named members. + ParseVendorExtendedType(state) || ParseUnscopedName(state) || (ParseOneCharToken(state, 'M') && ParseUnnamedTypeName(state))) { has_something = true; @@ -757,8 +881,14 @@ static bool ParsePrefix(State *state) { // ::= <source-name> [<abi-tags>] // ::= <local-source-name> [<abi-tags>] // ::= <unnamed-type-name> [<abi-tags>] +// ::= DC <source-name>+ E # C++17 structured binding +// ::= F <source-name> # C++20 constrained friend +// ::= F <operator-name> # C++20 constrained friend // // <local-source-name> is a GCC extension; see below. +// +// For the F notation for constrained friends, see +// https://github.com/itanium-cxx-abi/cxx-abi/issues/24#issuecomment-1491130332. static bool ParseUnqualifiedName(State *state) { ComplexityGuard guard(state); if (guard.IsTooComplex()) return false; @@ -767,6 +897,23 @@ static bool ParseUnqualifiedName(State *state) { ParseUnnamedTypeName(state)) { return ParseAbiTags(state); } + + // DC <source-name>+ E + ParseState copy = state->parse_state; + if (ParseTwoCharToken(state, "DC") && OneOrMore(ParseSourceName, state) && + ParseOneCharToken(state, 'E')) { + return true; + } + state->parse_state = copy; + + // F <source-name> + // F <operator-name> + if (ParseOneCharToken(state, 'F') && MaybeAppend(state, "friend ") && + (ParseSourceName(state) || ParseOperatorName(state, nullptr))) { + return true; + } + state->parse_state = copy; + return false; } @@ -824,7 +971,11 @@ static bool ParseLocalSourceName(State *state) { // <unnamed-type-name> ::= Ut [<(nonnegative) number>] _ // ::= <closure-type-name> // <closure-type-name> ::= Ul <lambda-sig> E [<(nonnegative) number>] _ -// <lambda-sig> ::= <(parameter) type>+ +// <lambda-sig> ::= <template-param-decl>* <(parameter) type>+ +// +// For <template-param-decl>* in <lambda-sig> see: +// +// https://github.com/itanium-cxx-abi/cxx-abi/issues/31 static bool ParseUnnamedTypeName(State *state) { ComplexityGuard guard(state); if (guard.IsTooComplex()) return false; @@ -847,6 +998,7 @@ static bool ParseUnnamedTypeName(State *state) { // Closure type. which = -1; if (ParseTwoCharToken(state, "Ul") && DisableAppend(state) && + ZeroOrMore(ParseTemplateParamDecl, state) && OneOrMore(ParseType, state) && RestoreAppend(state, copy.append) && ParseOneCharToken(state, 'E') && Optional(ParseNumber(state, &which)) && which <= std::numeric_limits<int>::max() - 2 && // Don't overflow. @@ -888,6 +1040,7 @@ static bool ParseNumber(State *state, int *number_out) { } if (p != RemainingInput(state)) { // Conversion succeeded. state->parse_state.mangled_idx += p - RemainingInput(state); + UpdateHighWaterMark(state); if (number_out != nullptr) { // Note: possibly truncate "number". *number_out = static_cast<int>(number); @@ -910,6 +1063,7 @@ static bool ParseFloatNumber(State *state) { } if (p != RemainingInput(state)) { // Conversion succeeded. state->parse_state.mangled_idx += p - RemainingInput(state); + UpdateHighWaterMark(state); return true; } return false; @@ -928,6 +1082,7 @@ static bool ParseSeqId(State *state) { } if (p != RemainingInput(state)) { // Conversion succeeded. state->parse_state.mangled_idx += p - RemainingInput(state); + UpdateHighWaterMark(state); return true; } return false; @@ -946,11 +1101,13 @@ static bool ParseIdentifier(State *state, size_t length) { MaybeAppendWithLength(state, RemainingInput(state), length); } state->parse_state.mangled_idx += length; + UpdateHighWaterMark(state); return true; } // <operator-name> ::= nw, and other two letters cases // ::= cv <type> # (cast) +// ::= li <source-name> # C++11 user-defined literal // ::= v <digit> <source-name> # vendor extended operator static bool ParseOperatorName(State *state, int *arity) { ComplexityGuard guard(state); @@ -961,7 +1118,7 @@ static bool ParseOperatorName(State *state, int *arity) { // First check with "cv" (cast) case. ParseState copy = state->parse_state; if (ParseTwoCharToken(state, "cv") && MaybeAppend(state, "operator ") && - EnterNestedName(state) && ParseType(state) && + EnterNestedName(state) && ParseConversionOperatorType(state) && LeaveNestedName(state, copy.nest_level)) { if (arity != nullptr) { *arity = 1; @@ -970,6 +1127,13 @@ static bool ParseOperatorName(State *state, int *arity) { } state->parse_state = copy; + // Then user-defined literals. + if (ParseTwoCharToken(state, "li") && MaybeAppend(state, "operator\"\" ") && + ParseSourceName(state)) { + return true; + } + state->parse_state = copy; + // Then vendor extended operators. if (ParseOneCharToken(state, 'v') && ParseDigit(state, arity) && ParseSourceName(state)) { @@ -997,36 +1161,120 @@ static bool ParseOperatorName(State *state, int *arity) { } MaybeAppend(state, p->real_name); state->parse_state.mangled_idx += 2; + UpdateHighWaterMark(state); return true; } } return false; } +// <operator-name> ::= cv <type> # (cast) +// +// The name of a conversion operator is the one place where cv-qualifiers, *, &, +// and other simple type combinators are expected to appear in our stripped-down +// demangling (elsewhere they appear in function signatures or template +// arguments, which we omit from the output). We make reasonable efforts to +// render simple cases accurately. +static bool ParseConversionOperatorType(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + ParseState copy = state->parse_state; + + // Scan pointers, const, and other easy mangling prefixes with postfix + // demanglings. Remember the range of input for later rescanning. + // + // See `ParseType` and the `switch` below for the meaning of each char. + const char* begin_simple_prefixes = RemainingInput(state); + while (ParseCharClass(state, "OPRCGrVK")) {} + const char* end_simple_prefixes = RemainingInput(state); + + // Emit the base type first. + if (!ParseType(state)) { + state->parse_state = copy; + return false; + } + + // Then rescan the easy type combinators in reverse order to emit their + // demanglings in the expected output order. + while (begin_simple_prefixes != end_simple_prefixes) { + switch (*--end_simple_prefixes) { + case 'P': + MaybeAppend(state, "*"); + break; + case 'R': + MaybeAppend(state, "&"); + break; + case 'O': + MaybeAppend(state, "&&"); + break; + case 'C': + MaybeAppend(state, " _Complex"); + break; + case 'G': + MaybeAppend(state, " _Imaginary"); + break; + case 'r': + MaybeAppend(state, " restrict"); + break; + case 'V': + MaybeAppend(state, " volatile"); + break; + case 'K': + MaybeAppend(state, " const"); + break; + } + } + return true; +} + // <special-name> ::= TV <type> // ::= TT <type> // ::= TI <type> // ::= TS <type> -// ::= TH <type> # thread-local +// ::= TW <name> # thread-local wrapper +// ::= TH <name> # thread-local initialization // ::= Tc <call-offset> <call-offset> <(base) encoding> // ::= GV <(object) name> +// ::= GR <(object) name> [<seq-id>] _ // ::= T <call-offset> <(base) encoding> +// ::= GTt <encoding> # transaction-safe entry point +// ::= TA <template-arg> # nontype template parameter object // G++ extensions: // ::= TC <type> <(offset) number> _ <(base) type> // ::= TF <type> // ::= TJ <type> -// ::= GR <name> +// ::= GR <name> # without final _, perhaps an earlier form? // ::= GA <encoding> // ::= Th <call-offset> <(base) encoding> // ::= Tv <call-offset> <(base) encoding> // -// Note: we don't care much about them since they don't appear in -// stack traces. The are special data. +// Note: Most of these are special data, not functions that occur in stack +// traces. Exceptions are TW and TH, which denote functions supporting the +// thread_local feature. For these see: +// +// https://maskray.me/blog/2021-02-14-all-about-thread-local-storage +// +// For TA see https://github.com/itanium-cxx-abi/cxx-abi/issues/63. static bool ParseSpecialName(State *state) { ComplexityGuard guard(state); if (guard.IsTooComplex()) return false; ParseState copy = state->parse_state; - if (ParseOneCharToken(state, 'T') && ParseCharClass(state, "VTISH") && + + if (ParseTwoCharToken(state, "TW")) { + MaybeAppend(state, "thread-local wrapper routine for "); + if (ParseName(state)) return true; + state->parse_state = copy; + return false; + } + + if (ParseTwoCharToken(state, "TH")) { + MaybeAppend(state, "thread-local initialization routine for "); + if (ParseName(state)) return true; + state->parse_state = copy; + return false; + } + + if (ParseOneCharToken(state, 'T') && ParseCharClass(state, "VTIS") && ParseType(state)) { return true; } @@ -1064,21 +1312,51 @@ static bool ParseSpecialName(State *state) { } state->parse_state = copy; - if (ParseTwoCharToken(state, "GR") && ParseName(state)) { + // <special-name> ::= GR <(object) name> [<seq-id>] _ # modern standard + // ::= GR <(object) name> # also recognized + if (ParseTwoCharToken(state, "GR")) { + MaybeAppend(state, "reference temporary for "); + if (!ParseName(state)) { + state->parse_state = copy; + return false; + } + const bool has_seq_id = ParseSeqId(state); + const bool has_underscore = ParseOneCharToken(state, '_'); + if (has_seq_id && !has_underscore) { + state->parse_state = copy; + return false; + } return true; } - state->parse_state = copy; if (ParseTwoCharToken(state, "GA") && ParseEncoding(state)) { return true; } state->parse_state = copy; + if (ParseThreeCharToken(state, "GTt") && + MaybeAppend(state, "transaction clone for ") && ParseEncoding(state)) { + return true; + } + state->parse_state = copy; + if (ParseOneCharToken(state, 'T') && ParseCharClass(state, "hv") && ParseCallOffset(state) && ParseEncoding(state)) { return true; } state->parse_state = copy; + + if (ParseTwoCharToken(state, "TA")) { + bool append = state->parse_state.append; + DisableAppend(state); + if (ParseTemplateArg(state)) { + RestoreAppend(state, append); + MaybeAppend(state, "template parameter object"); + return true; + } + } + state->parse_state = copy; + return false; } @@ -1182,7 +1460,6 @@ static bool ParseDecltype(State *state) { // ::= O <type> # rvalue reference-to (C++0x) // ::= C <type> # complex pair (C 2000) // ::= G <type> # imaginary (C 2000) -// ::= U <source-name> <type> # vendor extended type qualifier // ::= <builtin-type> // ::= <function-type> // ::= <class-enum-type> # note: just an alias for <name> @@ -1193,7 +1470,9 @@ static bool ParseDecltype(State *state) { // ::= <decltype> // ::= <substitution> // ::= Dp <type> # pack expansion of (C++0x) -// ::= Dv <num-elems> _ # GNU vector extension +// ::= Dv <(elements) number> _ <type> # GNU vector extension +// ::= Dv <(bytes) expression> _ <type> +// ::= Dk <type-constraint> # constrained auto // static bool ParseType(State *state) { ComplexityGuard guard(state); @@ -1236,12 +1515,6 @@ static bool ParseType(State *state) { } state->parse_state = copy; - if (ParseOneCharToken(state, 'U') && ParseSourceName(state) && - ParseType(state)) { - return true; - } - state->parse_state = copy; - if (ParseBuiltinType(state) || ParseFunctionType(state) || ParseClassEnumType(state) || ParseArrayType(state) || ParsePointerToMemberType(state) || ParseDecltype(state) || @@ -1260,54 +1533,160 @@ static bool ParseType(State *state) { return true; } + // GNU vector extension Dv <number> _ <type> if (ParseTwoCharToken(state, "Dv") && ParseNumber(state, nullptr) && - ParseOneCharToken(state, '_')) { + ParseOneCharToken(state, '_') && ParseType(state)) { return true; } state->parse_state = copy; - return false; + // GNU vector extension Dv <expression> _ <type> + if (ParseTwoCharToken(state, "Dv") && ParseExpression(state) && + ParseOneCharToken(state, '_') && ParseType(state)) { + return true; + } + state->parse_state = copy; + + if (ParseTwoCharToken(state, "Dk") && ParseTypeConstraint(state)) { + return true; + } + state->parse_state = copy; + + // For this notation see CXXNameMangler::mangleType in Clang's source code. + // The relevant logic and its comment "not clear how to mangle this!" date + // from 2011, so it may be with us awhile. + return ParseLongToken(state, "_SUBSTPACK_"); } +// <qualifiers> ::= <extended-qualifier>* <CV-qualifiers> // <CV-qualifiers> ::= [r] [V] [K] +// // We don't allow empty <CV-qualifiers> to avoid infinite loop in // ParseType(). static bool ParseCVQualifiers(State *state) { ComplexityGuard guard(state); if (guard.IsTooComplex()) return false; int num_cv_qualifiers = 0; + while (ParseExtendedQualifier(state)) ++num_cv_qualifiers; num_cv_qualifiers += ParseOneCharToken(state, 'r'); num_cv_qualifiers += ParseOneCharToken(state, 'V'); num_cv_qualifiers += ParseOneCharToken(state, 'K'); return num_cv_qualifiers > 0; } +// <extended-qualifier> ::= U <source-name> [<template-args>] +static bool ParseExtendedQualifier(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + ParseState copy = state->parse_state; + + if (!ParseOneCharToken(state, 'U')) return false; + + bool append = state->parse_state.append; + DisableAppend(state); + if (!ParseSourceName(state)) { + state->parse_state = copy; + return false; + } + Optional(ParseTemplateArgs(state)); + RestoreAppend(state, append); + return true; +} + // <builtin-type> ::= v, etc. # single-character builtin types -// ::= u <source-name> +// ::= <vendor-extended-type> // ::= Dd, etc. # two-character builtin types +// ::= DB (<number> | <expression>) _ # _BitInt(N) +// ::= DU (<number> | <expression>) _ # unsigned _BitInt(N) +// ::= DF <number> _ # _FloatN (N bits) +// ::= DF <number> x # _FloatNx +// ::= DF16b # std::bfloat16_t // // Not supported: -// ::= DF <number> _ # _FloatN (N bits) -// +// ::= [DS] DA <fixed-point-size> +// ::= [DS] DR <fixed-point-size> +// because real implementations of N1169 fixed-point are scant. static bool ParseBuiltinType(State *state) { ComplexityGuard guard(state); if (guard.IsTooComplex()) return false; - const AbbrevPair *p; - for (p = kBuiltinTypeList; p->abbrev != nullptr; ++p) { + ParseState copy = state->parse_state; + + // DB (<number> | <expression>) _ # _BitInt(N) + // DU (<number> | <expression>) _ # unsigned _BitInt(N) + if (ParseTwoCharToken(state, "DB") || + (ParseTwoCharToken(state, "DU") && MaybeAppend(state, "unsigned "))) { + bool append = state->parse_state.append; + DisableAppend(state); + int number = -1; + if (!ParseNumber(state, &number) && !ParseExpression(state)) { + state->parse_state = copy; + return false; + } + RestoreAppend(state, append); + + if (!ParseOneCharToken(state, '_')) { + state->parse_state = copy; + return false; + } + + MaybeAppend(state, "_BitInt("); + if (number >= 0) { + MaybeAppendDecimal(state, number); + } else { + MaybeAppend(state, "?"); // the best we can do for dependent sizes + } + MaybeAppend(state, ")"); + return true; + } + + // DF <number> _ # _FloatN + // DF <number> x # _FloatNx + // DF16b # std::bfloat16_t + if (ParseTwoCharToken(state, "DF")) { + if (ParseThreeCharToken(state, "16b")) { + MaybeAppend(state, "std::bfloat16_t"); + return true; + } + int number = 0; + if (!ParseNumber(state, &number)) { + state->parse_state = copy; + return false; + } + MaybeAppend(state, "_Float"); + MaybeAppendDecimal(state, number); + if (ParseOneCharToken(state, 'x')) { + MaybeAppend(state, "x"); + return true; + } + if (ParseOneCharToken(state, '_')) return true; + state->parse_state = copy; + return false; + } + + for (const AbbrevPair *p = kBuiltinTypeList; p->abbrev != nullptr; ++p) { // Guaranteed only 1- or 2-character strings in kBuiltinTypeList. if (p->abbrev[1] == '\0') { if (ParseOneCharToken(state, p->abbrev[0])) { MaybeAppend(state, p->real_name); - return true; + return true; // ::= v, etc. # single-character builtin types } } else if (p->abbrev[2] == '\0' && ParseTwoCharToken(state, p->abbrev)) { MaybeAppend(state, p->real_name); - return true; + return true; // ::= Dd, etc. # two-character builtin types } } + return ParseVendorExtendedType(state); +} + +// <vendor-extended-type> ::= u <source-name> [<template-args>] +static bool ParseVendorExtendedType(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + ParseState copy = state->parse_state; - if (ParseOneCharToken(state, 'u') && ParseSourceName(state)) { + if (ParseOneCharToken(state, 'u') && ParseSourceName(state) && + Optional(ParseTemplateArgs(state))) { return true; } state->parse_state = copy; @@ -1342,28 +1721,44 @@ static bool ParseExceptionSpec(State *state) { return false; } -// <function-type> ::= [exception-spec] F [Y] <bare-function-type> [O] E +// <function-type> ::= +// [exception-spec] [Dx] F [Y] <bare-function-type> [<ref-qualifier>] E +// +// <ref-qualifier> ::= R | O static bool ParseFunctionType(State *state) { ComplexityGuard guard(state); if (guard.IsTooComplex()) return false; ParseState copy = state->parse_state; - if (Optional(ParseExceptionSpec(state)) && ParseOneCharToken(state, 'F') && - Optional(ParseOneCharToken(state, 'Y')) && ParseBareFunctionType(state) && - Optional(ParseOneCharToken(state, 'O')) && - ParseOneCharToken(state, 'E')) { - return true; + Optional(ParseExceptionSpec(state)); + Optional(ParseTwoCharToken(state, "Dx")); + if (!ParseOneCharToken(state, 'F')) { + state->parse_state = copy; + return false; } - state->parse_state = copy; - return false; + Optional(ParseOneCharToken(state, 'Y')); + if (!ParseBareFunctionType(state)) { + state->parse_state = copy; + return false; + } + Optional(ParseCharClass(state, "RO")); + if (!ParseOneCharToken(state, 'E')) { + state->parse_state = copy; + return false; + } + return true; } -// <bare-function-type> ::= <(signature) type>+ +// <bare-function-type> ::= <overload-attribute>* <(signature) type>+ +// +// The <overload-attribute>* prefix is nonstandard; see the comment on +// ParseOverloadAttribute. static bool ParseBareFunctionType(State *state) { ComplexityGuard guard(state); if (guard.IsTooComplex()) return false; ParseState copy = state->parse_state; DisableAppend(state); - if (OneOrMore(ParseType, state)) { + if (ZeroOrMore(ParseOverloadAttribute, state) && + OneOrMore(ParseType, state)) { RestoreAppend(state, copy.append); MaybeAppend(state, "()"); return true; @@ -1372,11 +1767,43 @@ static bool ParseBareFunctionType(State *state) { return false; } +// <overload-attribute> ::= Ua <name> +// +// The nonstandard <overload-attribute> production is sufficient to accept the +// current implementation of __attribute__((enable_if(condition, "message"))) +// and future attributes of a similar shape. See +// https://clang.llvm.org/docs/AttributeReference.html#enable-if and the +// definition of CXXNameMangler::mangleFunctionEncodingBareType in Clang's +// source code. +static bool ParseOverloadAttribute(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + ParseState copy = state->parse_state; + if (ParseTwoCharToken(state, "Ua") && ParseName(state)) { + return true; + } + state->parse_state = copy; + return false; +} + // <class-enum-type> ::= <name> +// ::= Ts <name> # struct Name or class Name +// ::= Tu <name> # union Name +// ::= Te <name> # enum Name +// +// See http://shortn/_W3YrltiEd0. static bool ParseClassEnumType(State *state) { ComplexityGuard guard(state); if (guard.IsTooComplex()) return false; - return ParseName(state); + ParseState copy = state->parse_state; + if (Optional(ParseTwoCharToken(state, "Ts") || + ParseTwoCharToken(state, "Tu") || + ParseTwoCharToken(state, "Te")) && + ParseName(state)) { + return true; + } + state->parse_state = copy; + return false; } // <array-type> ::= A <(positive dimension) number> _ <(element) type> @@ -1413,21 +1840,83 @@ static bool ParsePointerToMemberType(State *state) { // <template-param> ::= T_ // ::= T <parameter-2 non-negative number> _ +// ::= TL <level-1> __ +// ::= TL <level-1> _ <parameter-2 non-negative number> _ static bool ParseTemplateParam(State *state) { ComplexityGuard guard(state); if (guard.IsTooComplex()) return false; if (ParseTwoCharToken(state, "T_")) { MaybeAppend(state, "?"); // We don't support template substitutions. - return true; + return true; // ::= T_ } ParseState copy = state->parse_state; if (ParseOneCharToken(state, 'T') && ParseNumber(state, nullptr) && ParseOneCharToken(state, '_')) { MaybeAppend(state, "?"); // We don't support template substitutions. + return true; // ::= T <parameter-2 non-negative number> _ + } + state->parse_state = copy; + + if (ParseTwoCharToken(state, "TL") && ParseNumber(state, nullptr)) { + if (ParseTwoCharToken(state, "__")) { + MaybeAppend(state, "?"); // We don't support template substitutions. + return true; // ::= TL <level-1> __ + } + + if (ParseOneCharToken(state, '_') && ParseNumber(state, nullptr) && + ParseOneCharToken(state, '_')) { + MaybeAppend(state, "?"); // We don't support template substitutions. + return true; // ::= TL <level-1> _ <parameter-2 non-negative number> _ + } + } + state->parse_state = copy; + return false; +} + +// <template-param-decl> +// ::= Ty # template type parameter +// ::= Tk <concept name> [<template-args>] # constrained type parameter +// ::= Tn <type> # template non-type parameter +// ::= Tt <template-param-decl>* E # template template parameter +// ::= Tp <template-param-decl> # template parameter pack +// +// NOTE: <concept name> is just a <name>: http://shortn/_MqJVyr0fc1 +// TODO(b/324066279): Implement optional suffix for `Tt`: +// [Q <requires-clause expr>] +static bool ParseTemplateParamDecl(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + ParseState copy = state->parse_state; + + if (ParseTwoCharToken(state, "Ty")) { + return true; + } + state->parse_state = copy; + + if (ParseTwoCharToken(state, "Tk") && ParseName(state) && + Optional(ParseTemplateArgs(state))) { + return true; + } + state->parse_state = copy; + + if (ParseTwoCharToken(state, "Tn") && ParseType(state)) { return true; } state->parse_state = copy; + + if (ParseTwoCharToken(state, "Tt") && + ZeroOrMore(ParseTemplateParamDecl, state) && + ParseOneCharToken(state, 'E')) { + return true; + } + state->parse_state = copy; + + if (ParseTwoCharToken(state, "Tp") && ParseTemplateParamDecl(state)) { + return true; + } + state->parse_state = copy; + return false; } @@ -1441,13 +1930,14 @@ static bool ParseTemplateTemplateParam(State *state) { ParseSubstitution(state, /*accept_std=*/false)); } -// <template-args> ::= I <template-arg>+ E +// <template-args> ::= I <template-arg>+ [Q <requires-clause expr>] E static bool ParseTemplateArgs(State *state) { ComplexityGuard guard(state); if (guard.IsTooComplex()) return false; ParseState copy = state->parse_state; DisableAppend(state); if (ParseOneCharToken(state, 'I') && OneOrMore(ParseTemplateArg, state) && + Optional(ParseQRequiresClauseExpr(state)) && ParseOneCharToken(state, 'E')) { RestoreAppend(state, copy.append); MaybeAppend(state, "<>"); @@ -1457,7 +1947,8 @@ static bool ParseTemplateArgs(State *state) { return false; } -// <template-arg> ::= <type> +// <template-arg> ::= <template-param-decl> <template-arg> +// ::= <type> // ::= <expr-primary> // ::= J <template-arg>* E # argument pack // ::= X <expression> E @@ -1541,7 +2032,7 @@ static bool ParseTemplateArg(State *state) { // ::= L <source-name> [<template-args>] [<expr-cast-value> E] if (ParseLocalSourceName(state) && Optional(ParseTemplateArgs(state))) { copy = state->parse_state; - if (ParseExprCastValue(state) && ParseOneCharToken(state, 'E')) { + if (ParseExprCastValueAndTrailingE(state)) { return true; } state->parse_state = copy; @@ -1560,6 +2051,12 @@ static bool ParseTemplateArg(State *state) { return true; } state->parse_state = copy; + + if (ParseTemplateParamDecl(state) && ParseTemplateArg(state)) { + return true; + } + state->parse_state = copy; + return false; } @@ -1614,6 +2111,13 @@ static bool ParseBaseUnresolvedName(State *state) { // <base-unresolved-name> // ::= [gs] sr <unresolved-qualifier-level>+ E // <base-unresolved-name> +// ::= sr St <simple-id> <simple-id> # nonstandard +// +// The last case is not part of the official grammar but has been observed in +// real-world examples that the GNU demangler (but not the LLVM demangler) is +// able to decode; see demangle_test.cc for one such symbol name. The shape +// sr St <simple-id> <simple-id> was inferred by closed-box testing of the GNU +// demangler. static bool ParseUnresolvedName(State *state) { ComplexityGuard guard(state); if (guard.IsTooComplex()) return false; @@ -1633,7 +2137,7 @@ static bool ParseUnresolvedName(State *state) { if (ParseTwoCharToken(state, "sr") && ParseOneCharToken(state, 'N') && ParseUnresolvedType(state) && - OneOrMore(/* <unresolved-qualifier-level> ::= */ ParseSimpleId, state) && + OneOrMore(ParseUnresolvedQualifierLevel, state) && ParseOneCharToken(state, 'E') && ParseBaseUnresolvedName(state)) { return true; } @@ -1641,35 +2145,160 @@ static bool ParseUnresolvedName(State *state) { if (Optional(ParseTwoCharToken(state, "gs")) && ParseTwoCharToken(state, "sr") && - OneOrMore(/* <unresolved-qualifier-level> ::= */ ParseSimpleId, state) && + OneOrMore(ParseUnresolvedQualifierLevel, state) && ParseOneCharToken(state, 'E') && ParseBaseUnresolvedName(state)) { return true; } state->parse_state = copy; + if (ParseTwoCharToken(state, "sr") && ParseTwoCharToken(state, "St") && + ParseSimpleId(state) && ParseSimpleId(state)) { + return true; + } + state->parse_state = copy; + return false; } +// <unresolved-qualifier-level> ::= <simple-id> +// ::= <substitution> <template-args> +// +// The production <substitution> <template-args> is nonstandard but is observed +// in practice. An upstream discussion on the best shape of <unresolved-name> +// has not converged: +// +// https://github.com/itanium-cxx-abi/cxx-abi/issues/38 +static bool ParseUnresolvedQualifierLevel(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + + if (ParseSimpleId(state)) return true; + + ParseState copy = state->parse_state; + if (ParseSubstitution(state, /*accept_std=*/false) && + ParseTemplateArgs(state)) { + return true; + } + state->parse_state = copy; + return false; +} + +// <union-selector> ::= _ [<number>] +// +// https://github.com/itanium-cxx-abi/cxx-abi/issues/47 +static bool ParseUnionSelector(State *state) { + return ParseOneCharToken(state, '_') && Optional(ParseNumber(state, nullptr)); +} + +// <function-param> ::= fp <(top-level) CV-qualifiers> _ +// ::= fp <(top-level) CV-qualifiers> <number> _ +// ::= fL <number> p <(top-level) CV-qualifiers> _ +// ::= fL <number> p <(top-level) CV-qualifiers> <number> _ +// ::= fpT # this +static bool ParseFunctionParam(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + + ParseState copy = state->parse_state; + + // Function-param expression (level 0). + if (ParseTwoCharToken(state, "fp") && Optional(ParseCVQualifiers(state)) && + Optional(ParseNumber(state, nullptr)) && ParseOneCharToken(state, '_')) { + return true; + } + state->parse_state = copy; + + // Function-param expression (level 1+). + if (ParseTwoCharToken(state, "fL") && Optional(ParseNumber(state, nullptr)) && + ParseOneCharToken(state, 'p') && Optional(ParseCVQualifiers(state)) && + Optional(ParseNumber(state, nullptr)) && ParseOneCharToken(state, '_')) { + return true; + } + state->parse_state = copy; + + return ParseThreeCharToken(state, "fpT"); +} + +// <braced-expression> ::= <expression> +// ::= di <field source-name> <braced-expression> +// ::= dx <index expression> <braced-expression> +// ::= dX <expression> <expression> <braced-expression> +static bool ParseBracedExpression(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + + ParseState copy = state->parse_state; + + if (ParseTwoCharToken(state, "di") && ParseSourceName(state) && + ParseBracedExpression(state)) { + return true; + } + state->parse_state = copy; + + if (ParseTwoCharToken(state, "dx") && ParseExpression(state) && + ParseBracedExpression(state)) { + return true; + } + state->parse_state = copy; + + if (ParseTwoCharToken(state, "dX") && + ParseExpression(state) && ParseExpression(state) && + ParseBracedExpression(state)) { + return true; + } + state->parse_state = copy; + + return ParseExpression(state); +} + // <expression> ::= <1-ary operator-name> <expression> // ::= <2-ary operator-name> <expression> <expression> // ::= <3-ary operator-name> <expression> <expression> <expression> +// ::= pp_ <expression> # ++e; pp <expression> is e++ +// ::= mm_ <expression> # --e; mm <expression> is e-- // ::= cl <expression>+ E // ::= cp <simple-id> <expression>* E # Clang-specific. +// ::= so <type> <expression> [<number>] <union-selector>* [p] E // ::= cv <type> <expression> # type (expression) // ::= cv <type> _ <expression>* E # type (expr-list) +// ::= tl <type> <braced-expression>* E +// ::= il <braced-expression>* E +// ::= [gs] nw <expression>* _ <type> E +// ::= [gs] nw <expression>* _ <type> <initializer> +// ::= [gs] na <expression>* _ <type> E +// ::= [gs] na <expression>* _ <type> <initializer> +// ::= [gs] dl <expression> +// ::= [gs] da <expression> +// ::= dc <type> <expression> +// ::= sc <type> <expression> +// ::= cc <type> <expression> +// ::= rc <type> <expression> +// ::= ti <type> +// ::= te <expression> // ::= st <type> +// ::= at <type> +// ::= az <expression> +// ::= nx <expression> // ::= <template-param> // ::= <function-param> +// ::= sZ <template-param> +// ::= sZ <function-param> +// ::= sP <template-arg>* E // ::= <expr-primary> // ::= dt <expression> <unresolved-name> # expr.name // ::= pt <expression> <unresolved-name> # expr->name // ::= sp <expression> # argument pack expansion +// ::= fl <binary operator-name> <expression> +// ::= fr <binary operator-name> <expression> +// ::= fL <binary operator-name> <expression> <expression> +// ::= fR <binary operator-name> <expression> <expression> +// ::= tw <expression> +// ::= tr // ::= sr <type> <unqualified-name> <template-args> // ::= sr <type> <unqualified-name> -// <function-param> ::= fp <(top-level) CV-qualifiers> _ -// ::= fp <(top-level) CV-qualifiers> <number> _ -// ::= fL <number> p <(top-level) CV-qualifiers> _ -// ::= fL <number> p <(top-level) CV-qualifiers> <number> _ +// ::= u <source-name> <template-arg>* E # vendor extension +// ::= rq <requirement>+ E +// ::= rQ <bare-function-type> _ <requirement>+ E static bool ParseExpression(State *state) { ComplexityGuard guard(state); if (guard.IsTooComplex()) return false; @@ -1686,6 +2315,15 @@ static bool ParseExpression(State *state) { } state->parse_state = copy; + // Preincrement and predecrement. Postincrement and postdecrement are handled + // by the operator-name logic later on. + if ((ParseThreeCharToken(state, "pp_") || + ParseThreeCharToken(state, "mm_")) && + ParseExpression(state)) { + return true; + } + state->parse_state = copy; + // Clang-specific "cp <simple-id> <expression>* E" // https://clang.llvm.org/doxygen/ItaniumMangle_8cpp_source.html#l04338 if (ParseTwoCharToken(state, "cp") && ParseSimpleId(state) && @@ -1694,17 +2332,65 @@ static bool ParseExpression(State *state) { } state->parse_state = copy; - // Function-param expression (level 0). - if (ParseTwoCharToken(state, "fp") && Optional(ParseCVQualifiers(state)) && - Optional(ParseNumber(state, nullptr)) && ParseOneCharToken(state, '_')) { + // <expression> ::= so <type> <expression> [<number>] <union-selector>* [p] E + // + // https://github.com/itanium-cxx-abi/cxx-abi/issues/47 + if (ParseTwoCharToken(state, "so") && ParseType(state) && + ParseExpression(state) && Optional(ParseNumber(state, nullptr)) && + ZeroOrMore(ParseUnionSelector, state) && + Optional(ParseOneCharToken(state, 'p')) && + ParseOneCharToken(state, 'E')) { return true; } state->parse_state = copy; - // Function-param expression (level 1+). - if (ParseTwoCharToken(state, "fL") && Optional(ParseNumber(state, nullptr)) && - ParseOneCharToken(state, 'p') && Optional(ParseCVQualifiers(state)) && - Optional(ParseNumber(state, nullptr)) && ParseOneCharToken(state, '_')) { + // <expression> ::= <function-param> + if (ParseFunctionParam(state)) return true; + state->parse_state = copy; + + // <expression> ::= tl <type> <braced-expression>* E + if (ParseTwoCharToken(state, "tl") && ParseType(state) && + ZeroOrMore(ParseBracedExpression, state) && + ParseOneCharToken(state, 'E')) { + return true; + } + state->parse_state = copy; + + // <expression> ::= il <braced-expression>* E + if (ParseTwoCharToken(state, "il") && + ZeroOrMore(ParseBracedExpression, state) && + ParseOneCharToken(state, 'E')) { + return true; + } + state->parse_state = copy; + + // <expression> ::= [gs] nw <expression>* _ <type> E + // ::= [gs] nw <expression>* _ <type> <initializer> + // ::= [gs] na <expression>* _ <type> E + // ::= [gs] na <expression>* _ <type> <initializer> + if (Optional(ParseTwoCharToken(state, "gs")) && + (ParseTwoCharToken(state, "nw") || ParseTwoCharToken(state, "na")) && + ZeroOrMore(ParseExpression, state) && ParseOneCharToken(state, '_') && + ParseType(state) && + (ParseOneCharToken(state, 'E') || ParseInitializer(state))) { + return true; + } + state->parse_state = copy; + + // <expression> ::= [gs] dl <expression> + // ::= [gs] da <expression> + if (Optional(ParseTwoCharToken(state, "gs")) && + (ParseTwoCharToken(state, "dl") || ParseTwoCharToken(state, "da")) && + ParseExpression(state)) { + return true; + } + state->parse_state = copy; + + // dynamic_cast, static_cast, const_cast, reinterpret_cast. + // + // <expression> ::= (dc | sc | cc | rc) <type> <expression> + if (ParseCharClass(state, "dscr") && ParseOneCharToken(state, 'c') && + ParseType(state) && ParseExpression(state)) { return true; } state->parse_state = copy; @@ -1746,15 +2432,96 @@ static bool ParseExpression(State *state) { } state->parse_state = copy; + // typeid(type) + if (ParseTwoCharToken(state, "ti") && ParseType(state)) { + return true; + } + state->parse_state = copy; + + // typeid(expression) + if (ParseTwoCharToken(state, "te") && ParseExpression(state)) { + return true; + } + state->parse_state = copy; + // sizeof type if (ParseTwoCharToken(state, "st") && ParseType(state)) { return true; } state->parse_state = copy; + // alignof(type) + if (ParseTwoCharToken(state, "at") && ParseType(state)) { + return true; + } + state->parse_state = copy; + + // alignof(expression), a GNU extension + if (ParseTwoCharToken(state, "az") && ParseExpression(state)) { + return true; + } + state->parse_state = copy; + + // noexcept(expression) appearing as an expression in a dependent signature + if (ParseTwoCharToken(state, "nx") && ParseExpression(state)) { + return true; + } + state->parse_state = copy; + + // sizeof...(pack) + // + // <expression> ::= sZ <template-param> + // ::= sZ <function-param> + if (ParseTwoCharToken(state, "sZ") && + (ParseFunctionParam(state) || ParseTemplateParam(state))) { + return true; + } + state->parse_state = copy; + + // sizeof...(pack) captured from an alias template + // + // <expression> ::= sP <template-arg>* E + if (ParseTwoCharToken(state, "sP") && ZeroOrMore(ParseTemplateArg, state) && + ParseOneCharToken(state, 'E')) { + return true; + } + state->parse_state = copy; + + // Unary folds (... op pack) and (pack op ...). + // + // <expression> ::= fl <binary operator-name> <expression> + // ::= fr <binary operator-name> <expression> + if ((ParseTwoCharToken(state, "fl") || ParseTwoCharToken(state, "fr")) && + ParseOperatorName(state, nullptr) && ParseExpression(state)) { + return true; + } + state->parse_state = copy; + + // Binary folds (init op ... op pack) and (pack op ... op init). + // + // <expression> ::= fL <binary operator-name> <expression> <expression> + // ::= fR <binary operator-name> <expression> <expression> + if ((ParseTwoCharToken(state, "fL") || ParseTwoCharToken(state, "fR")) && + ParseOperatorName(state, nullptr) && ParseExpression(state) && + ParseExpression(state)) { + return true; + } + state->parse_state = copy; + + // tw <expression>: throw e + if (ParseTwoCharToken(state, "tw") && ParseExpression(state)) { + return true; + } + state->parse_state = copy; + + // tr: throw (rethrows an exception from the handler that caught it) + if (ParseTwoCharToken(state, "tr")) return true; + // Object and pointer member access expressions. + // + // <expression> ::= (dt | pt) <expression> <unresolved-name> if ((ParseTwoCharToken(state, "dt") || ParseTwoCharToken(state, "pt")) && - ParseExpression(state) && ParseType(state)) { + ParseExpression(state) && ParseUnresolvedName(state)) { return true; } state->parse_state = copy; @@ -1774,9 +2541,61 @@ static bool ParseExpression(State *state) { } state->parse_state = copy; + // Vendor extended expressions + if (ParseOneCharToken(state, 'u') && ParseSourceName(state) && + ZeroOrMore(ParseTemplateArg, state) && ParseOneCharToken(state, 'E')) { + return true; + } + state->parse_state = copy; + + // <expression> ::= rq <requirement>+ E + // + // https://github.com/itanium-cxx-abi/cxx-abi/issues/24 + if (ParseTwoCharToken(state, "rq") && OneOrMore(ParseRequirement, state) && + ParseOneCharToken(state, 'E')) { + return true; + } + state->parse_state = copy; + + // <expression> ::= rQ <bare-function-type> _ <requirement>+ E + // + // https://github.com/itanium-cxx-abi/cxx-abi/issues/24 + if (ParseTwoCharToken(state, "rQ") && ParseBareFunctionType(state) && + ParseOneCharToken(state, '_') && OneOrMore(ParseRequirement, state) && + ParseOneCharToken(state, 'E')) { + return true; + } + state->parse_state = copy; + return ParseUnresolvedName(state); } +// <initializer> ::= pi <expression>* E +// ::= il <braced-expression>* E +// +// The il ... E form is not in the ABI spec but is seen in practice for +// braced-init-lists in new-expressions, which are standard syntax from C++11 +// on. +static bool ParseInitializer(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + ParseState copy = state->parse_state; + + if (ParseTwoCharToken(state, "pi") && ZeroOrMore(ParseExpression, state) && + ParseOneCharToken(state, 'E')) { + return true; + } + state->parse_state = copy; + + if (ParseTwoCharToken(state, "il") && + ZeroOrMore(ParseBracedExpression, state) && + ParseOneCharToken(state, 'E')) { + return true; + } + state->parse_state = copy; + return false; +} + // <expr-primary> ::= L <type> <(value) number> E // ::= L <type> <(value) float> E // ::= L <mangled-name> E @@ -1819,10 +2638,35 @@ static bool ParseExprPrimary(State *state) { return false; } - // The merged cast production. - if (ParseOneCharToken(state, 'L') && ParseType(state) && - ParseExprCastValue(state)) { - return true; + if (ParseOneCharToken(state, 'L')) { + // There are two special cases in which a literal may or must contain a type + // without a value. The first is that both LDnE and LDn0E are valid + // encodings of nullptr, used in different situations. Recognize LDnE here, + // leaving LDn0E to be recognized by the general logic afterward. + if (ParseThreeCharToken(state, "DnE")) return true; + + // The second special case is a string literal, currently mangled in C++98 + // style as LA<length + 1>_KcE. This is inadequate to support C++11 and + // later versions, and the discussion of this problem has not converged. + // + // https://github.com/itanium-cxx-abi/cxx-abi/issues/64 + // + // For now the bare-type mangling is what's used in practice, so we + // recognize this form and only this form if an array type appears here. + // Someday we'll probably have to accept a new form of value mangling in + // LA...E constructs. (Note also that C++20 allows a wide range of + // class-type objects as template arguments, so someday their values will be + // mangled and we'll have to recognize them here too.) + if (RemainingInput(state)[0] == 'A' /* an array type follows */) { + if (ParseType(state) && ParseOneCharToken(state, 'E')) return true; + state->parse_state = copy; + return false; + } + + // The merged cast production. + if (ParseType(state) && ParseExprCastValueAndTrailingE(state)) { + return true; + } } state->parse_state = copy; @@ -1836,7 +2680,7 @@ static bool ParseExprPrimary(State *state) { } // <number> or <float>, followed by 'E', as described above ParseExprPrimary. -static bool ParseExprCastValue(State *state) { +static bool ParseExprCastValueAndTrailingE(State *state) { ComplexityGuard guard(state); if (guard.IsTooComplex()) return false; // We have to be able to backtrack after accepting a number because we could @@ -1848,39 +2692,148 @@ static bool ParseExprCastValue(State *state) { } state->parse_state = copy; - if (ParseFloatNumber(state) && ParseOneCharToken(state, 'E')) { + if (ParseFloatNumber(state)) { + // <float> for ordinary floating-point types + if (ParseOneCharToken(state, 'E')) return true; + + // <float> _ <float> for complex floating-point types + if (ParseOneCharToken(state, '_') && ParseFloatNumber(state) && + ParseOneCharToken(state, 'E')) { + return true; + } + } + state->parse_state = copy; + + return false; +} + +// Parses `Q <requires-clause expr>`. +// If parsing fails, applies backtracking to `state`. +// +// This function covers two symbols instead of one for convenience, +// because in LLVM's Itanium ABI mangling grammar, <requires-clause expr> +// always appears after Q. +// +// Does not emit the parsed `requires` clause to simplify the implementation. +// In other words, these two functions' mangled names will demangle identically: +// +// template <typename T> +// int foo(T) requires IsIntegral<T>; +// +// vs. +// +// template <typename T> +// int foo(T); +static bool ParseQRequiresClauseExpr(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + ParseState copy = state->parse_state; + DisableAppend(state); + + // <requires-clause expr> is just an <expression>: http://shortn/_9E1Ul0rIM8 + if (ParseOneCharToken(state, 'Q') && ParseExpression(state)) { + RestoreAppend(state, copy.append); + return true; + } + + // also restores append + state->parse_state = copy; + return false; +} + +// <requirement> ::= X <expression> [N] [R <type-constraint>] +// <requirement> ::= T <type> +// <requirement> ::= Q <constraint-expression> +// +// <constraint-expression> ::= <expression> +// +// https://github.com/itanium-cxx-abi/cxx-abi/issues/24 +static bool ParseRequirement(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + + ParseState copy = state->parse_state; + + if (ParseOneCharToken(state, 'X') && ParseExpression(state) && + Optional(ParseOneCharToken(state, 'N')) && + // This logic backtracks cleanly if we eat an R but a valid type doesn't + // follow it. + (!ParseOneCharToken(state, 'R') || ParseTypeConstraint(state))) { return true; } state->parse_state = copy; + if (ParseOneCharToken(state, 'T') && ParseType(state)) return true; + state->parse_state = copy; + + if (ParseOneCharToken(state, 'Q') && ParseExpression(state)) return true; + state->parse_state = copy; + return false; } +// <type-constraint> ::= <name> +static bool ParseTypeConstraint(State *state) { + return ParseName(state); +} + // <local-name> ::= Z <(function) encoding> E <(entity) name> [<discriminator>] // ::= Z <(function) encoding> E s [<discriminator>] +// ::= Z <(function) encoding> E d [<(parameter) number>] _ <name> // // Parsing a common prefix of these two productions together avoids an // exponential blowup of backtracking. Parse like: // <local-name> := Z <encoding> E <local-name-suffix> // <local-name-suffix> ::= s [<discriminator>] +// ::= d [<(parameter) number>] _ <name> // ::= <name> [<discriminator>] static bool ParseLocalNameSuffix(State *state) { ComplexityGuard guard(state); if (guard.IsTooComplex()) return false; + ParseState copy = state->parse_state; + + // <local-name-suffix> ::= d [<(parameter) number>] _ <name> + if (ParseOneCharToken(state, 'd') && + (IsDigit(RemainingInput(state)[0]) || RemainingInput(state)[0] == '_')) { + int number = -1; + Optional(ParseNumber(state, &number)); + if (number < -1 || number > 2147483645) { + // Work around overflow cases. We do not expect these outside of a fuzzer + // or other source of adversarial input. If we do detect overflow here, + // we'll print {default arg#1}. + number = -1; + } + number += 2; + + // The ::{default arg#1}:: infix must be rendered before the lambda itself, + // so print this before parsing the rest of the <local-name-suffix>. + MaybeAppend(state, "::{default arg#"); + MaybeAppendDecimal(state, number); + MaybeAppend(state, "}::"); + if (ParseOneCharToken(state, '_') && ParseName(state)) return true; + + // On late parse failure, roll back not only the input but also the output, + // whose trailing NUL was overwritten. + state->parse_state = copy; + if (state->parse_state.append) { + state->out[state->parse_state.out_cur_idx] = '\0'; + } + return false; + } + state->parse_state = copy; + // <local-name-suffix> ::= <name> [<discriminator>] if (MaybeAppend(state, "::") && ParseName(state) && Optional(ParseDiscriminator(state))) { return true; } - - // Since we're not going to overwrite the above "::" by re-parsing the - // <encoding> (whose trailing '\0' byte was in the byte now holding the - // first ':'), we have to rollback the "::" if the <name> parse failed. + state->parse_state = copy; if (state->parse_state.append) { - state->out[state->parse_state.out_cur_idx - 2] = '\0'; + state->out[state->parse_state.out_cur_idx] = '\0'; } + // <local-name-suffix> ::= s [<discriminator>] return ParseOneCharToken(state, 's') && Optional(ParseDiscriminator(state)); } @@ -1896,12 +2849,22 @@ static bool ParseLocalName(State *state) { return false; } -// <discriminator> := _ <(non-negative) number> +// <discriminator> := _ <digit> +// := __ <number (>= 10)> _ static bool ParseDiscriminator(State *state) { ComplexityGuard guard(state); if (guard.IsTooComplex()) return false; ParseState copy = state->parse_state; - if (ParseOneCharToken(state, '_') && ParseNumber(state, nullptr)) { + + // Both forms start with _ so parse that first. + if (!ParseOneCharToken(state, '_')) return false; + + // <digit> + if (ParseDigit(state, nullptr)) return true; + + // _ <number> _ + if (ParseOneCharToken(state, '_') && ParseNumber(state, nullptr) && + ParseOneCharToken(state, '_')) { return true; } state->parse_state = copy; @@ -1947,6 +2910,7 @@ static bool ParseSubstitution(State *state, bool accept_std) { MaybeAppend(state, p->real_name); } ++state->parse_state.mangled_idx; + UpdateHighWaterMark(state); return true; } } @@ -1972,10 +2936,13 @@ static bool ParseTopLevelMangledName(State *state) { MaybeAppend(state, RemainingInput(state)); return true; } + ReportHighWaterMark(state); return false; // Unconsumed suffix. } return true; } + + ReportHighWaterMark(state); return false; } @@ -1985,6 +2952,10 @@ static bool Overflowed(const State *state) { // The demangler entry point. bool Demangle(const char* mangled, char* out, size_t out_size) { + if (mangled[0] == '_' && mangled[1] == 'R') { + return DemangleRustSymbolEncoding(mangled, out, out_size); + } + State state; InitState(&state, mangled, out, out_size); return ParseTopLevelMangledName(&state) && !Overflowed(&state) && |