diff options
author | Samuel Benzaquen <sbenza@google.com> | 2022-11-29 08:57:48 -0800 |
---|---|---|
committer | Copybara-Service <copybara-worker@google.com> | 2022-11-29 08:58:23 -0800 |
commit | 82196f059f213c50738142a799bb166b2971950d (patch) | |
tree | 5dbfdb957de4369303797515ad34d3099aeeece7 /absl/strings/internal/str_format/checker.h | |
parent | 13708db87b1ab69f4f2b3214f3f51e986546f282 (diff) | |
download | abseil-82196f059f213c50738142a799bb166b2971950d.tar.gz abseil-82196f059f213c50738142a799bb166b2971950d.tar.bz2 abseil-82196f059f213c50738142a799bb166b2971950d.zip |
Convert the full parser into constexpr now that Abseil requires C++14, and use
this parser for the static checker.
This fixes some outstanding bugs where the static checker differed from the
dynamic one.
Also, fix `%v` to be accepted with POSIX syntax.
Tested:
Presubmit
TGP OCL:487237262:BASE:490275393:1669141454896:92dd62e3
PiperOrigin-RevId: 491650577
Change-Id: Id138c108187428b3aea46f8887495f1da12c91b2
Diffstat (limited to 'absl/strings/internal/str_format/checker.h')
-rw-r--r-- | absl/strings/internal/str_format/checker.h | 356 |
1 files changed, 41 insertions, 315 deletions
diff --git a/absl/strings/internal/str_format/checker.h b/absl/strings/internal/str_format/checker.h index aeb9d48d..eab6ab9d 100644 --- a/absl/strings/internal/str_format/checker.h +++ b/absl/strings/internal/str_format/checker.h @@ -15,8 +15,11 @@ #ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_CHECKER_H_ #define ABSL_STRINGS_INTERNAL_STR_FORMAT_CHECKER_H_ +#include <algorithm> + #include "absl/base/attributes.h" #include "absl/strings/internal/str_format/arg.h" +#include "absl/strings/internal/str_format/constexpr_parser.h" #include "absl/strings/internal/str_format/extension.h" // Compile time check support for entry points. @@ -36,333 +39,56 @@ namespace absl { ABSL_NAMESPACE_BEGIN namespace str_format_internal { -constexpr bool AllOf() { return true; } - -template <typename... T> -constexpr bool AllOf(bool b, T... t) { - return b && AllOf(t...); -} - #ifdef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER -constexpr bool ContainsChar(const char* chars, char c) { - return *chars == c || (*chars && ContainsChar(chars + 1, c)); -} - -// A constexpr compatible list of Convs. -struct ConvList { - const FormatConversionCharSet* array; - int count; - - // We do the bound check here to avoid having to do it on the callers. - // Returning an empty FormatConversionCharSet has the same effect as - // short circuiting because it will never match any conversion. - constexpr FormatConversionCharSet operator[](int i) const { - return i < count ? array[i] : FormatConversionCharSet{}; - } - - constexpr ConvList without_front() const { - return count != 0 ? ConvList{array + 1, count - 1} : *this; - } -}; - -template <size_t count> -struct ConvListT { - // Make sure the array has size > 0. - FormatConversionCharSet list[count ? count : 1]; -}; - -constexpr char GetChar(string_view str, size_t index) { - return index < str.size() ? str[index] : char{}; -} - -constexpr string_view ConsumeFront(string_view str, size_t len = 1) { - return len <= str.size() ? string_view(str.data() + len, str.size() - len) - : string_view(); -} - -constexpr string_view ConsumeAnyOf(string_view format, const char* chars) { - while (ContainsChar(chars, GetChar(format, 0))) { - format = ConsumeFront(format); - } - return format; -} - -constexpr bool IsDigit(char c) { return c >= '0' && c <= '9'; } - -// Helper class for the ParseDigits function. -// It encapsulates the two return values we need there. -struct Integer { - string_view format; - int value; - - // If the next character is a '$', consume it. - // Otherwise, make `this` an invalid positional argument. - constexpr Integer ConsumePositionalDollar() const { - if (GetChar(format, 0) == '$') { - return Integer{ConsumeFront(format), value}; - } else { - return Integer{format, 0}; - } - } -}; - -constexpr Integer ParseDigits(string_view format) { - int value = 0; - while (IsDigit(GetChar(format, 0))) { - value = 10 * value + GetChar(format, 0) - '0'; - format = ConsumeFront(format); - } - - return Integer{format, value}; -} - -// Parse digits for a positional argument. -// The parsing also consumes the '$'. -constexpr Integer ParsePositional(string_view format) { - return ParseDigits(format).ConsumePositionalDollar(); -} - -// Parses a single conversion specifier. -// See ConvParser::Run() for post conditions. -class ConvParser { - constexpr ConvParser SetFormat(string_view format) const { - return ConvParser(format, args_, error_, arg_position_, is_positional_); - } - - constexpr ConvParser SetArgs(ConvList args) const { - return ConvParser(format_, args, error_, arg_position_, is_positional_); - } - - constexpr ConvParser SetError(bool error) const { - return ConvParser(format_, args_, error_ || error, arg_position_, - is_positional_); - } - - constexpr ConvParser SetArgPosition(int arg_position) const { - return ConvParser(format_, args_, error_, arg_position, is_positional_); - } - - // Consumes the next arg and verifies that it matches `conv`. - // `error_` is set if there is no next arg or if it doesn't match `conv`. - constexpr ConvParser ConsumeNextArg(char conv) const { - return SetArgs(args_.without_front()).SetError(!Contains(args_[0], conv)); - } - - // Verify that positional argument `i.value` matches `conv`. - // `error_` is set if `i.value` is not a valid argument or if it doesn't - // match. - constexpr ConvParser VerifyPositional(Integer i, char conv) const { - return SetFormat(i.format).SetError(!Contains(args_[i.value - 1], conv)); - } - - // Parse the position of the arg and store it in `arg_position_`. - constexpr ConvParser ParseArgPosition(Integer arg) const { - return SetFormat(arg.format).SetArgPosition(arg.value); - } - - // Consume the flags. - constexpr ConvParser ParseFlags() const { - return SetFormat(ConsumeAnyOf(format_, "-+ #0")); - } - - // Consume the width. - // If it is '*', we verify that it matches `args_`. `error_` is set if it - // doesn't match. - constexpr ConvParser ParseWidth() const { - char first_char = GetChar(format_, 0); - - if (IsDigit(first_char)) { - return SetFormat(ParseDigits(format_).format); - } else if (first_char == '*') { - if (is_positional_) { - return VerifyPositional(ParsePositional(ConsumeFront(format_)), '*'); - } else { - return SetFormat(ConsumeFront(format_)).ConsumeNextArg('*'); - } - } else { - return *this; +template <FormatConversionCharSet... C> +constexpr bool ValidFormatImpl(string_view format) { + int next_arg = 0; + const char* p = format.data(); + const char* const end = p + format.size(); + constexpr FormatConversionCharSet + kAllowedConvs[(std::max)(sizeof...(C), size_t{1})] = {C...}; + bool used[(std::max)(sizeof...(C), size_t{1})]{}; + constexpr int kNumArgs = sizeof...(C); + while (p != end) { + while (p != end && *p != '%') ++p; + if (p == end) { + break; } - } - - // Consume the precision. - // If it is '*', we verify that it matches `args_`. `error_` is set if it - // doesn't match. - constexpr ConvParser ParsePrecision() const { - if (GetChar(format_, 0) != '.') { - return *this; - } else if (GetChar(format_, 1) == '*') { - if (is_positional_) { - return VerifyPositional(ParsePositional(ConsumeFront(format_, 2)), '*'); - } else { - return SetFormat(ConsumeFront(format_, 2)).ConsumeNextArg('*'); - } - } else { - return SetFormat(ParseDigits(ConsumeFront(format_)).format); + if (p + 1 >= end) return false; + if (p[1] == '%') { + // %% + p += 2; + continue; } - } - - // Consume the length characters. - constexpr ConvParser ParseLength() const { - return SetFormat(ConsumeAnyOf(format_, "lLhjztq")); - } - - // Consume the conversion character and verify that it matches `args_`. - // `error_` is set if it doesn't match. - constexpr ConvParser ParseConversion() const { - char first_char = GetChar(format_, 0); - if (first_char == 'v' && *(format_.data() - 1) != '%') { - return SetError(true); + UnboundConversion conv(absl::kConstInit); + p = ConsumeUnboundConversion(p + 1, end, &conv, &next_arg); + if (p == nullptr) return false; + if (conv.arg_position <= 0 || conv.arg_position > kNumArgs) { + return false; } - - if (is_positional_) { - return VerifyPositional({ConsumeFront(format_), arg_position_}, - first_char); - } else { - return ConsumeNextArg(first_char).SetFormat(ConsumeFront(format_)); + if (!Contains(kAllowedConvs[conv.arg_position - 1], conv.conv)) { + return false; } - } - - constexpr ConvParser(string_view format, ConvList args, bool error, - int arg_position, bool is_positional) - : format_(format), - args_(args), - error_(error), - arg_position_(arg_position), - is_positional_(is_positional) {} - - public: - constexpr ConvParser(string_view format, ConvList args, bool is_positional) - : format_(format), - args_(args), - error_(false), - arg_position_(0), - is_positional_(is_positional) {} - - // Consume the whole conversion specifier. - // `format()` will be set to the character after the conversion character. - // `error()` will be set if any of the arguments do not match. - constexpr ConvParser Run() const { - ConvParser parser = *this; - - if (is_positional_) { - parser = ParseArgPosition(ParsePositional(format_)); - } - - return parser.ParseFlags() - .ParseWidth() - .ParsePrecision() - .ParseLength() - .ParseConversion(); - } - - constexpr string_view format() const { return format_; } - constexpr ConvList args() const { return args_; } - constexpr bool error() const { return error_; } - constexpr bool is_positional() const { return is_positional_; } - - private: - string_view format_; - // Current list of arguments. If we are not in positional mode we will consume - // from the front. - ConvList args_; - bool error_; - // Holds the argument position of the conversion character, if we are in - // positional mode. Otherwise, it is unspecified. - int arg_position_; - // Whether we are in positional mode. - // It changes the behavior of '*' and where to find the converted argument. - bool is_positional_; -}; - -// Parses a whole format expression. -// See FormatParser::Run(). -class FormatParser { - static constexpr bool FoundPercent(string_view format) { - return format.empty() || - (GetChar(format, 0) == '%' && GetChar(format, 1) != '%'); - } - - // We use an inner function to increase the recursion limit. - // The inner function consumes up to `limit` characters on every run. - // This increases the limit from 512 to ~512*limit. - static constexpr string_view ConsumeNonPercentInner(string_view format) { - int limit = 20; - while (!FoundPercent(format) && limit != 0) { - size_t len = 0; - - if (GetChar(format, 0) == '%' && GetChar(format, 1) == '%') { - len = 2; - } else { - len = 1; + used[conv.arg_position - 1] = true; + for (auto extra : {conv.width, conv.precision}) { + if (extra.is_from_arg()) { + int pos = extra.get_from_arg(); + if (pos <= 0 || pos > kNumArgs) return false; + used[pos - 1] = true; + if (!Contains(kAllowedConvs[pos - 1], '*')) { + return false; + } } - - format = ConsumeFront(format, len); - --limit; } - - return format; } - - // Consume characters until the next conversion spec %. - // It skips %%. - static constexpr string_view ConsumeNonPercent(string_view format) { - while (!FoundPercent(format)) { - format = ConsumeNonPercentInner(format); + if (sizeof...(C) != 0) { + for (bool b : used) { + if (!b) return false; } - - return format; - } - - static constexpr bool IsPositional(string_view format) { - while (IsDigit(GetChar(format, 0))) { - format = ConsumeFront(format); - } - - return GetChar(format, 0) == '$'; } - - constexpr bool RunImpl(bool is_positional) const { - // In non-positional mode we require all arguments to be consumed. - // In positional mode just reaching the end of the format without errors is - // enough. - return (format_.empty() && (is_positional || args_.count == 0)) || - (!format_.empty() && - ValidateArg( - ConvParser(ConsumeFront(format_), args_, is_positional).Run())); - } - - constexpr bool ValidateArg(ConvParser conv) const { - return !conv.error() && FormatParser(conv.format(), conv.args()) - .RunImpl(conv.is_positional()); - } - - public: - constexpr FormatParser(string_view format, ConvList args) - : format_(ConsumeNonPercent(format)), args_(args) {} - - // Runs the parser for `format` and `args`. - // It verifies that the format is valid and that all conversion specifiers - // match the arguments passed. - // In non-positional mode it also verfies that all arguments are consumed. - constexpr bool Run() const { - return RunImpl(!format_.empty() && IsPositional(ConsumeFront(format_))); - } - - private: - string_view format_; - // Current list of arguments. - // If we are not in positional mode we will consume from the front and will - // have to be empty in the end. - ConvList args_; -}; - -template <FormatConversionCharSet... C> -constexpr bool ValidFormatImpl(string_view format) { - return FormatParser(format, - {ConvListT<sizeof...(C)>{{C...}}.list, sizeof...(C)}) - .Run(); + return true; } #endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER |