diff options
Diffstat (limited to 'absl/strings')
37 files changed, 1194 insertions, 847 deletions
diff --git a/absl/strings/BUILD.bazel b/absl/strings/BUILD.bazel index 809f566d..d86f59f0 100644 --- a/absl/strings/BUILD.bazel +++ b/absl/strings/BUILD.bazel @@ -43,6 +43,7 @@ cc_library( "internal/stl_type_traits.h", "internal/str_join_internal.h", "internal/str_split_internal.h", + "internal/stringify_sink.cc", "internal/stringify_sink.h", "match.cc", "numbers.cc", @@ -57,6 +58,7 @@ cc_library( "charconv.h", "escaping.h", "internal/damerau_levenshtein_distance.h", + "internal/has_absl_stringify.h", "internal/string_constant.h", "match.h", "numbers.h", @@ -322,6 +324,7 @@ cc_library( "//absl/container:compressed_tuple", "//absl/container:inlined_vector", "//absl/container:layout", + "//absl/crc:crc_cord_state", "//absl/functional:function_ref", "//absl/meta:type_traits", "//absl/types:span", @@ -403,6 +406,7 @@ cc_test( ":cord_internal", ":cord_rep_test_util", "//absl/base:config", + "//absl/crc:crc_cord_state", "@com_google_googletest//:gtest_main", ], ) @@ -461,6 +465,7 @@ cc_library( "//absl/base:raw_logging_internal", "//absl/container:fixed_array", "//absl/container:inlined_vector", + "//absl/crc:crc_cord_state", "//absl/functional:function_ref", "//absl/meta:type_traits", "//absl/numeric:bits", @@ -657,6 +662,7 @@ cc_test( ":cordz_update_scope", ":cordz_update_tracker", "//absl/base:config", + "//absl/crc:crc_cord_state", "//absl/synchronization", "//absl/synchronization:thread_pool", "@com_google_googletest//:gtest_main", @@ -1148,6 +1154,7 @@ cc_library( "internal/str_format/arg.h", "internal/str_format/bind.h", "internal/str_format/checker.h", + "internal/str_format/constexpr_parser.h", "internal/str_format/extension.h", "internal/str_format/float_conversion.h", "internal/str_format/output.h", diff --git a/absl/strings/CMakeLists.txt b/absl/strings/CMakeLists.txt index 30dedcf5..a23b34a1 100644 --- a/absl/strings/CMakeLists.txt +++ b/absl/strings/CMakeLists.txt @@ -23,6 +23,7 @@ absl_cc_library( "escaping.h" "internal/damerau_levenshtein_distance.h" "internal/string_constant.h" + "internal/has_absl_stringify.h" "match.h" "numbers.h" "str_cat.h" @@ -44,6 +45,7 @@ absl_cc_library( "internal/memutil.cc" "internal/memutil.h" "internal/stringify_sink.h" + "internal/stringify_sink.cc" "internal/stl_type_traits.h" "internal/str_join_internal.h" "internal/str_split_internal.h" @@ -411,6 +413,7 @@ absl_cc_library( "internal/str_format/arg.h" "internal/str_format/bind.h" "internal/str_format/checker.h" + "internal/str_format/constexpr_parser.h" "internal/str_format/extension.h" "internal/str_format/float_conversion.h" "internal/str_format/output.h" @@ -602,6 +605,7 @@ absl_cc_library( absl::compressed_tuple absl::config absl::core_headers + absl::crc_cord_state absl::endian absl::inlined_vector absl::layout @@ -780,6 +784,7 @@ absl_cc_test( absl::cordz_statistics absl::cordz_update_scope absl::cordz_update_tracker + absl::crc_cord_state absl::thread_pool GTest::gmock_main ) @@ -879,6 +884,7 @@ absl_cc_library( absl::cordz_update_scope absl::cordz_update_tracker absl::core_headers + absl::crc_cord_state absl::endian absl::fixed_array absl::function_ref @@ -1051,6 +1057,7 @@ absl_cc_test( absl::config absl::cord_internal absl::cord_rep_test_util + absl::crc_cord_state GTest::gmock_main ) diff --git a/absl/strings/cord.cc b/absl/strings/cord.cc index 66f45fef..0bac4164 100644 --- a/absl/strings/cord.cc +++ b/absl/strings/cord.cc @@ -35,6 +35,7 @@ #include "absl/base/port.h" #include "absl/container/fixed_array.h" #include "absl/container/inlined_vector.h" +#include "absl/crc/internal/crc_cord_state.h" #include "absl/strings/cord_buffer.h" #include "absl/strings/escaping.h" #include "absl/strings/internal/cord_data_edge.h" @@ -420,6 +421,7 @@ Cord& Cord::operator=(absl::string_view src) { // we keep it here to make diffs easier. void Cord::InlineRep::AppendArray(absl::string_view src, MethodIdentifier method) { + MaybeRemoveEmptyCrcNode(); if (src.empty()) return; // memcpy(_, nullptr, 0) is undefined. size_t appended = 0; @@ -479,6 +481,10 @@ inline CordRep* Cord::TakeRep() && { template <typename C> inline void Cord::AppendImpl(C&& src) { auto constexpr method = CordzUpdateTracker::kAppendCord; + + contents_.MaybeRemoveEmptyCrcNode(); + if (src.empty()) return; + if (empty()) { // Since destination is empty, we can avoid allocating a node, if (src.contents_.is_tree()) { @@ -591,6 +597,9 @@ void Cord::Append(T&& src) { template void Cord::Append(std::string&& src); void Cord::Prepend(const Cord& src) { + contents_.MaybeRemoveEmptyCrcNode(); + if (src.empty()) return; + CordRep* src_tree = src.contents_.tree(); if (src_tree != nullptr) { CordRep::Ref(src_tree); @@ -605,7 +614,9 @@ void Cord::Prepend(const Cord& src) { } void Cord::PrependArray(absl::string_view src, MethodIdentifier method) { + contents_.MaybeRemoveEmptyCrcNode(); if (src.empty()) return; // memcpy(_, nullptr, 0) is undefined. + if (!contents_.is_tree()) { size_t cur_size = contents_.inline_size(); if (cur_size + src.size() <= InlineRep::kMaxInline) { @@ -665,6 +676,7 @@ void Cord::RemovePrefix(size_t n) { ABSL_INTERNAL_CHECK(n <= size(), absl::StrCat("Requested prefix size ", n, " exceeds Cord's size ", size())); + contents_.MaybeRemoveEmptyCrcNode(); CordRep* tree = contents_.tree(); if (tree == nullptr) { contents_.remove_prefix(n); @@ -695,6 +707,7 @@ void Cord::RemoveSuffix(size_t n) { ABSL_INTERNAL_CHECK(n <= size(), absl::StrCat("Requested suffix size ", n, " exceeds Cord's size ", size())); + contents_.MaybeRemoveEmptyCrcNode(); CordRep* tree = contents_.tree(); if (tree == nullptr) { contents_.reduce_size(n); @@ -842,26 +855,44 @@ inline absl::string_view Cord::InlineRep::FindFlatStartPiece() const { return absl::string_view(node->external()->base + offset, length); } -void Cord::SetExpectedChecksum(uint32_t crc) { +void Cord::SetCrcCordState(crc_internal::CrcCordState state) { auto constexpr method = CordzUpdateTracker::kSetExpectedChecksum; - if (empty()) return; - - if (!contents_.is_tree()) { + if (empty()) { + contents_.MaybeRemoveEmptyCrcNode(); + CordRep* rep = CordRepCrc::New(nullptr, std::move(state)); + contents_.EmplaceTree(rep, method); + } else if (!contents_.is_tree()) { CordRep* rep = contents_.MakeFlatWithExtraCapacity(0); - rep = CordRepCrc::New(rep, crc); + rep = CordRepCrc::New(rep, std::move(state)); contents_.EmplaceTree(rep, method); } else { const CordzUpdateScope scope(contents_.data_.cordz_info(), method); - CordRep* rep = CordRepCrc::New(contents_.data_.as_tree(), crc); + CordRep* rep = CordRepCrc::New(contents_.data_.as_tree(), std::move(state)); contents_.SetTree(rep, scope); } } +void Cord::SetExpectedChecksum(uint32_t crc) { + // Construct a CrcCordState with a single chunk. + crc_internal::CrcCordState state; + state.mutable_rep()->prefix_crc.push_back( + crc_internal::CrcCordState::PrefixCrc(size(), absl::crc32c_t{crc})); + SetCrcCordState(std::move(state)); +} + +const crc_internal::CrcCordState* Cord::MaybeGetCrcCordState() const { + if (!contents_.is_tree() || !contents_.tree()->IsCrc()) { + return nullptr; + } + return &contents_.tree()->crc()->crc_cord_state; +} + absl::optional<uint32_t> Cord::ExpectedChecksum() const { if (!contents_.is_tree() || !contents_.tree()->IsCrc()) { return absl::nullopt; } - return contents_.tree()->crc()->crc; + return static_cast<uint32_t>( + contents_.tree()->crc()->crc_cord_state.Checksum()); } inline int Cord::CompareSlowPath(absl::string_view rhs, size_t compared_size, @@ -929,6 +960,7 @@ inline int Cord::CompareSlowPath(const Cord& rhs, size_t compared_size, } inline absl::string_view Cord::GetFirstChunk(const Cord& c) { + if (c.empty()) return {}; return c.contents_.FindFlatStartPiece(); } inline absl::string_view Cord::GetFirstChunk(absl::string_view sv) { @@ -1166,6 +1198,10 @@ absl::string_view Cord::FlattenSlowPath() { /* static */ bool Cord::GetFlatAux(CordRep* rep, absl::string_view* fragment) { assert(rep != nullptr); + if (rep->length == 0) { + *fragment = absl::string_view(); + return true; + } rep = cord_internal::SkipCrcNode(rep); if (rep->IsFlat()) { *fragment = absl::string_view(rep->flat()->Data(), rep->length); @@ -1197,6 +1233,7 @@ absl::string_view Cord::FlattenSlowPath() { absl::cord_internal::CordRep* rep, absl::FunctionRef<void(absl::string_view)> callback) { assert(rep != nullptr); + if (rep->length == 0) return; rep = cord_internal::SkipCrcNode(rep); if (rep->IsBtree()) { @@ -1230,8 +1267,12 @@ static void DumpNode(CordRep* rep, bool include_data, std::ostream* os, if (include_data) *os << static_cast<void*>(rep); *os << "]"; *os << " " << std::setw(indent) << ""; - if (rep->IsCrc()) { - *os << "CRC crc=" << rep->crc()->crc << "\n"; + bool leaf = false; + if (rep == nullptr) { + *os << "NULL\n"; + leaf = true; + } else if (rep->IsCrc()) { + *os << "CRC crc=" << rep->crc()->crc_cord_state.Checksum() << "\n"; indent += kIndentStep; rep = rep->crc()->child; } else if (rep->IsSubstring()) { @@ -1239,6 +1280,7 @@ static void DumpNode(CordRep* rep, bool include_data, std::ostream* os, indent += kIndentStep; rep = rep->substring()->child; } else { // Leaf or ring + leaf = true; if (rep->IsExternal()) { *os << "EXTERNAL ["; if (include_data) @@ -1252,6 +1294,8 @@ static void DumpNode(CordRep* rep, bool include_data, std::ostream* os, } else { CordRepBtree::Dump(rep, /*label=*/ "", include_data, *os); } + } + if (leaf) { if (stack.empty()) break; rep = stack.back(); stack.pop_back(); @@ -1297,11 +1341,14 @@ static bool VerifyNode(CordRep* root, CordRep* start_node, node->substring()->child->length, ReportError(root, node)); } else if (node->IsCrc()) { - ABSL_INTERNAL_CHECK(node->crc()->child != nullptr, - ReportError(root, node)); - ABSL_INTERNAL_CHECK(node->crc()->length == node->crc()->child->length, - ReportError(root, node)); - worklist.push_back(node->crc()->child); + ABSL_INTERNAL_CHECK( + node->crc()->child != nullptr || node->crc()->length == 0, + ReportError(root, node)); + if (node->crc()->child != nullptr) { + ABSL_INTERNAL_CHECK(node->crc()->length == node->crc()->child->length, + ReportError(root, node)); + worklist.push_back(node->crc()->child); + } } } while (!worklist.empty()); return true; diff --git a/absl/strings/cord.h b/absl/strings/cord.h index 88e1c85d..1349b115 100644 --- a/absl/strings/cord.h +++ b/absl/strings/cord.h @@ -76,6 +76,7 @@ #include "absl/base/macros.h" #include "absl/base/port.h" #include "absl/container/inlined_vector.h" +#include "absl/crc/internal/crc_cord_state.h" #include "absl/functional/function_ref.h" #include "absl/meta/type_traits.h" #include "absl/strings/cord_analysis.h" @@ -926,6 +927,13 @@ class Cord { void set_inline_size(size_t size) { data_.set_inline_size(size); } size_t inline_size() const { return data_.inline_size(); } + // Empty cords that carry a checksum have a CordRepCrc node with a null + // child node. The code can avoid lots of special cases where it would + // otherwise transition from tree to inline storage if we just remove the + // CordRepCrc node before mutations. Must never be called inside a + // CordzUpdateScope since it untracks the cordz info. + void MaybeRemoveEmptyCrcNode(); + cord_internal::InlineData data_; }; InlineRep contents_; @@ -995,6 +1003,10 @@ class Cord { }); return H::combine(combiner.finalize(std::move(hash_state)), size()); } + + friend class CrcCord; + void SetCrcCordState(crc_internal::CrcCordState state); + const crc_internal::CrcCordState* MaybeGetCrcCordState() const; }; ABSL_NAMESPACE_END @@ -1236,6 +1248,18 @@ inline void Cord::InlineRep::CopyToArray(char* dst) const { cord_internal::SmallMemmove(dst, data_.as_chars(), n); } +inline void Cord::InlineRep::MaybeRemoveEmptyCrcNode() { + CordRep* rep = tree(); + if (rep == nullptr || ABSL_PREDICT_TRUE(rep->length > 0)) { + return; + } + assert(rep->IsCrc()); + assert(rep->crc()->child == nullptr); + CordzInfo::MaybeUntrackCord(cordz_info()); + CordRep::Unref(rep); + ResetToEmpty(); +} + constexpr inline Cord::Cord() noexcept {} inline Cord::Cord(absl::string_view src) @@ -1285,7 +1309,7 @@ inline size_t Cord::size() const { return contents_.size(); } -inline bool Cord::empty() const { return contents_.empty(); } +inline bool Cord::empty() const { return size() == 0; } inline size_t Cord::EstimatedMemoryUsage( CordMemoryAccounting accounting_method) const { @@ -1411,7 +1435,11 @@ inline Cord::ChunkIterator::ChunkIterator(cord_internal::CordRep* tree) { inline Cord::ChunkIterator::ChunkIterator(const Cord* cord) { if (CordRep* tree = cord->contents_.tree()) { bytes_remaining_ = tree->length; - InitTree(tree); + if (ABSL_PREDICT_TRUE(bytes_remaining_ != 0)) { + InitTree(tree); + } else { + current_chunk_ = {}; + } } else { bytes_remaining_ = cord->contents_.inline_size(); current_chunk_ = {cord->contents_.data(), bytes_remaining_}; @@ -1580,7 +1608,7 @@ inline void Cord::ForEachChunk( if (rep == nullptr) { callback(absl::string_view(contents_.data(), contents_.size())); } else { - return ForEachChunkAux(rep, callback); + ForEachChunkAux(rep, callback); } } diff --git a/absl/strings/cord_test.cc b/absl/strings/cord_test.cc index 1fc4be6e..a4fa8955 100644 --- a/absl/strings/cord_test.cc +++ b/absl/strings/cord_test.cc @@ -1988,6 +1988,12 @@ TEST_P(CordTest, HugeCord) { // Tests that Append() works ok when handed a self reference TEST_P(CordTest, AppendSelf) { + // Test the empty case. + absl::Cord empty; + MaybeHarden(empty); + empty.Append(empty); + ASSERT_EQ(empty, ""); + // We run the test until data is ~16K // This guarantees it covers small, medium and large data. std::string control_data = "Abc"; @@ -2712,7 +2718,7 @@ class CordMutator { // clang-format off // This array is constant-initialized in conformant compilers. -CordMutator cord_mutators[] ={ +CordMutator cord_mutators[] = { {"clear", [](absl::Cord& c) { c.Clear(); }}, {"overwrite", [](absl::Cord& c) { c = "overwritten"; }}, { @@ -2742,6 +2748,25 @@ CordMutator cord_mutators[] ={ [](absl::Cord& c) { c.RemoveSuffix(c.size() / 2); } }, { + "append empty string", + [](absl::Cord& c) { c.Append(""); }, + [](absl::Cord& c) { } + }, + { + "append empty cord", + [](absl::Cord& c) { c.Append(absl::Cord()); }, + [](absl::Cord& c) { } + }, + { + "append empty checksummed cord", + [](absl::Cord& c) { + absl::Cord to_append; + to_append.SetExpectedChecksum(999); + c.Append(to_append); + }, + [](absl::Cord& c) { } + }, + { "prepend string", [](absl::Cord& c) { c.Prepend("9876543210"); }, [](absl::Cord& c) { c.RemovePrefix(10); } @@ -2763,12 +2788,33 @@ CordMutator cord_mutators[] ={ [](absl::Cord& c) { c.RemovePrefix(10); } }, { + "prepend empty string", + [](absl::Cord& c) { c.Prepend(""); }, + [](absl::Cord& c) { } + }, + { + "prepend empty cord", + [](absl::Cord& c) { c.Prepend(absl::Cord()); }, + [](absl::Cord& c) { } + }, + { + "prepend empty checksummed cord", + [](absl::Cord& c) { + absl::Cord to_prepend; + to_prepend.SetExpectedChecksum(999); + c.Prepend(to_prepend); + }, + [](absl::Cord& c) { } + }, + { "prepend self", [](absl::Cord& c) { c.Prepend(c); }, [](absl::Cord& c) { c.RemovePrefix(c.size() / 2); } }, - {"remove prefix", [](absl::Cord& c) { c.RemovePrefix(2); }}, - {"remove suffix", [](absl::Cord& c) { c.RemoveSuffix(2); }}, + {"remove prefix", [](absl::Cord& c) { c.RemovePrefix(c.size() / 2); }}, + {"remove suffix", [](absl::Cord& c) { c.RemoveSuffix(c.size() / 2); }}, + {"remove 0-prefix", [](absl::Cord& c) { c.RemovePrefix(0); }}, + {"remove 0-suffix", [](absl::Cord& c) { c.RemoveSuffix(0); }}, {"subcord", [](absl::Cord& c) { c = c.Subcord(1, c.size() - 2); }}, { "swap inline", @@ -2810,6 +2856,12 @@ TEST_P(CordTest, ExpectedChecksum) { EXPECT_EQ(c1.ExpectedChecksum().value_or(0), 12345); EXPECT_EQ(c1, base_value); + // Test that setting an expected checksum again doesn't crash or leak + // memory. + c1.SetExpectedChecksum(12345); + EXPECT_EQ(c1.ExpectedChecksum().value_or(0), 12345); + EXPECT_EQ(c1, base_value); + // CRC persists through copies, assignments, and moves: absl::Cord c1_copy_construct = c1; EXPECT_EQ(c1_copy_construct.ExpectedChecksum().value_or(0), 12345); @@ -2834,6 +2886,13 @@ TEST_P(CordTest, ExpectedChecksum) { c2.SetExpectedChecksum(24680); mutator.Mutate(c2); + + if (c1 == c2) { + // Not a mutation (for example, appending the empty string). + // Whether the checksum is removed is not defined. + continue; + } + EXPECT_EQ(c2.ExpectedChecksum(), absl::nullopt); if (mutator.CanUndo()) { @@ -2903,3 +2962,98 @@ TEST_P(CordTest, ExpectedChecksum) { } } } + +// Test the special cases encountered with an empty checksummed cord. +TEST_P(CordTest, ChecksummedEmptyCord) { + absl::Cord c1; + EXPECT_FALSE(c1.ExpectedChecksum().has_value()); + + // Setting an expected checksum works. + c1.SetExpectedChecksum(12345); + EXPECT_EQ(c1.ExpectedChecksum().value_or(0), 12345); + EXPECT_EQ(c1, ""); + EXPECT_TRUE(c1.empty()); + + // Test that setting an expected checksum again doesn't crash or leak memory. + c1.SetExpectedChecksum(12345); + EXPECT_EQ(c1.ExpectedChecksum().value_or(0), 12345); + EXPECT_EQ(c1, ""); + EXPECT_TRUE(c1.empty()); + + // CRC persists through copies, assignments, and moves: + absl::Cord c1_copy_construct = c1; + EXPECT_EQ(c1_copy_construct.ExpectedChecksum().value_or(0), 12345); + + absl::Cord c1_copy_assign; + c1_copy_assign = c1; + EXPECT_EQ(c1_copy_assign.ExpectedChecksum().value_or(0), 12345); + + absl::Cord c1_move(std::move(c1_copy_assign)); + EXPECT_EQ(c1_move.ExpectedChecksum().value_or(0), 12345); + + EXPECT_EQ(c1.ExpectedChecksum().value_or(0), 12345); + + // A CRC Cord compares equal to its non-CRC value. + EXPECT_EQ(c1, absl::Cord()); + + for (const CordMutator& mutator : cord_mutators) { + SCOPED_TRACE(mutator.Name()); + + // Exercise mutating an empty checksummed cord to catch crashes and exercise + // memory sanitizers. + absl::Cord c2; + c2.SetExpectedChecksum(24680); + mutator.Mutate(c2); + + if (c2.empty()) { + // Not a mutation + continue; + } + EXPECT_EQ(c2.ExpectedChecksum(), absl::nullopt); + + if (mutator.CanUndo()) { + mutator.Undo(c2); + } + } + + absl::Cord c3; + c3.SetExpectedChecksum(999); + const absl::Cord& cc3 = c3; + + // Test that all cord reading operations function in the face of an + // expected checksum. + EXPECT_TRUE(cc3.StartsWith("")); + EXPECT_TRUE(cc3.EndsWith("")); + EXPECT_TRUE(cc3.empty()); + EXPECT_EQ(cc3, ""); + EXPECT_EQ(cc3, absl::Cord()); + EXPECT_EQ(cc3.size(), 0); + EXPECT_EQ(cc3.Compare(absl::Cord()), 0); + EXPECT_EQ(cc3.Compare(c1), 0); + EXPECT_EQ(cc3.Compare(cc3), 0); + EXPECT_EQ(cc3.Compare(""), 0); + EXPECT_EQ(cc3.Compare("wxyz"), -1); + EXPECT_EQ(cc3.Compare(absl::Cord("wxyz")), -1); + EXPECT_EQ(absl::Cord("wxyz").Compare(cc3), 1); + EXPECT_EQ(std::string(cc3), ""); + + std::string dest; + absl::CopyCordToString(cc3, &dest); + EXPECT_EQ(dest, ""); + + for (absl::string_view chunk : cc3.Chunks()) { // NOLINT(unreachable loop) + static_cast<void>(chunk); + GTEST_FAIL() << "no chunks expected"; + } + EXPECT_TRUE(cc3.chunk_begin() == cc3.chunk_end()); + + for (char ch : cc3.Chars()) { // NOLINT(unreachable loop) + static_cast<void>(ch); + GTEST_FAIL() << "no chars expected"; + } + EXPECT_TRUE(cc3.char_begin() == cc3.char_end()); + + EXPECT_EQ(cc3.TryFlat(), ""); + EXPECT_EQ(absl::HashOf(c3), absl::HashOf(absl::Cord())); + EXPECT_EQ(absl::HashOf(c3), absl::HashOf(absl::string_view())); +} diff --git a/absl/strings/escaping.cc b/absl/strings/escaping.cc index 7d97944e..93966846 100644 --- a/absl/strings/escaping.cc +++ b/absl/strings/escaping.cc @@ -784,9 +784,7 @@ template <typename String> bool Base64UnescapeInternal(const char* src, size_t slen, String* dest, const signed char* unbase64) { // Determine the size of the output string. Base64 encodes every 3 bytes into - // 4 characters. any leftover chars are added directly for good measure. - // This is documented in the base64 RFC: - // https://datatracker.ietf.org/doc/html/rfc3548 + // 4 characters. Any leftover chars are added directly for good measure. const size_t dest_len = 3 * (slen / 4) + (slen % 4); strings_internal::STLStringResizeUninitialized(dest, dest_len); diff --git a/absl/strings/escaping.h b/absl/strings/escaping.h index f5ca26c5..d2c18e02 100644 --- a/absl/strings/escaping.h +++ b/absl/strings/escaping.h @@ -117,11 +117,29 @@ std::string Utf8SafeCEscape(absl::string_view src); // conversion. std::string Utf8SafeCHexEscape(absl::string_view src); +// Base64Escape() +// +// Encodes a `src` string into a base64-encoded 'dest' string with padding +// characters. This function conforms with RFC 4648 section 4 (base64) and RFC +// 2045. See also CalculateBase64EscapedLen(). +void Base64Escape(absl::string_view src, std::string* dest); +std::string Base64Escape(absl::string_view src); + +// WebSafeBase64Escape() +// +// Encodes a `src` string into a base64 string, like Base64Escape() does, but +// outputs '-' instead of '+' and '_' instead of '/', and does not pad 'dest'. +// This function conforms with RFC 4648 section 5 (base64url). +void WebSafeBase64Escape(absl::string_view src, std::string* dest); +std::string WebSafeBase64Escape(absl::string_view src); + // Base64Unescape() // // Converts a `src` string encoded in Base64 to its binary equivalent, writing // it to a `dest` buffer, returning `true` on success. If `src` contains invalid // characters, `dest` is cleared and returns `false`. +// Padding is optional. If padding is included, it must be correct. In the +// padding, '=' and '.' are treated identically. bool Base64Unescape(absl::string_view src, std::string* dest); // WebSafeBase64Unescape() @@ -129,23 +147,10 @@ bool Base64Unescape(absl::string_view src, std::string* dest); // Converts a `src` string encoded in Base64 to its binary equivalent, writing // it to a `dest` buffer, but using '-' instead of '+', and '_' instead of '/'. // If `src` contains invalid characters, `dest` is cleared and returns `false`. +// Padding is optional. If padding is included, it must be correct. In the +// padding, '=' and '.' are treated identically. bool WebSafeBase64Unescape(absl::string_view src, std::string* dest); -// Base64Escape() -// -// Encodes a `src` string into a base64-encoded string, with padding characters. -// This function conforms with RFC 4648 section 4 (base64). -void Base64Escape(absl::string_view src, std::string* dest); -std::string Base64Escape(absl::string_view src); - -// WebSafeBase64Escape() -// -// Encodes a `src` string into a base64-like string, using '-' instead of '+' -// and '_' instead of '/', and without padding. This function conforms with RFC -// 4648 section 5 (base64url). -void WebSafeBase64Escape(absl::string_view src, std::string* dest); -std::string WebSafeBase64Escape(absl::string_view src); - // HexStringToBytes() // // Converts an ASCII hex string into bytes, returning binary data of length diff --git a/absl/strings/escaping_test.cc b/absl/strings/escaping_test.cc index 45671a0e..44ffcba7 100644 --- a/absl/strings/escaping_test.cc +++ b/absl/strings/escaping_test.cc @@ -617,6 +617,48 @@ TEST(Base64, EscapeAndUnescape) { TestEscapeAndUnescape<std::string>(); } +TEST(Base64, Padding) { + // Padding is optional. + // '.' is an acceptable padding character, just like '='. + std::initializer_list<absl::string_view> good_padding = { + "YQ", + "YQ==", + "YQ=.", + "YQ.=", + "YQ..", + }; + for (absl::string_view b64 : good_padding) { + std::string decoded; + EXPECT_TRUE(absl::Base64Unescape(b64, &decoded)); + EXPECT_EQ(decoded, "a"); + std::string websafe_decoded; + EXPECT_TRUE(absl::WebSafeBase64Unescape(b64, &websafe_decoded)); + EXPECT_EQ(websafe_decoded, "a"); + } + std::initializer_list<absl::string_view> bad_padding = { + "YQ=", + "YQ.", + "YQ===", + "YQ==.", + "YQ=.=", + "YQ=..", + "YQ.==", + "YQ.=.", + "YQ..=", + "YQ...", + "YQ====", + "YQ....", + "YQ=====", + "YQ.....", + }; + for (absl::string_view b64 : bad_padding) { + std::string decoded; + EXPECT_FALSE(absl::Base64Unescape(b64, &decoded)); + std::string websafe_decoded; + EXPECT_FALSE(absl::WebSafeBase64Unescape(b64, &websafe_decoded)); + } +} + TEST(Base64, DISABLED_HugeData) { const size_t kSize = size_t(3) * 1000 * 1000 * 1000; static_assert(kSize % 3 == 0, "kSize must be divisible by 3"); diff --git a/absl/strings/internal/char_map.h b/absl/strings/internal/char_map.h index 5aabc1fc..70a90343 100644 --- a/absl/strings/internal/char_map.h +++ b/absl/strings/internal/char_map.h @@ -73,10 +73,10 @@ class Charmap { } // Containing all the chars in the C-string 's'. - // Note that this is expensively recursive because of the C++11 constexpr - // formulation. Use only in constexpr initializers. static constexpr Charmap FromString(const char* s) { - return *s == 0 ? Charmap() : (Char(*s) | FromString(s + 1)); + Charmap ret; + while (*s) ret = ret | Char(*s++); + return ret; } // Containing all the chars in the closed interval [lo,hi]. diff --git a/absl/strings/internal/cord_internal.h b/absl/strings/internal/cord_internal.h index fcca3a28..6022c1df 100644 --- a/absl/strings/internal/cord_internal.h +++ b/absl/strings/internal/cord_internal.h @@ -225,7 +225,11 @@ struct CordRep { : length(l), refcount(immortal), tag(EXTERNAL), storage{} {} // The following three fields have to be less than 32 bytes since - // that is the smallest supported flat node size. + // that is the smallest supported flat node size. Some code optimizations rely + // on the specific layout of these fields. Notably: the non-trivial field + // `refcount` being preceeded by `length`, and being tailed by POD data + // members only. + // # LINT.IfChange size_t length; RefcountAndFlags refcount; // If tag < FLAT, it represents CordRepKind and indicates the type of node. @@ -241,6 +245,7 @@ struct CordRep { // allocate room for these in the derived class, as not all compilers reuse // padding space from the base class (clang and gcc do, MSVC does not, etc) uint8_t storage[3]; + // # LINT.ThenChange(cord_rep_btree.h:copy_raw) // Returns true if this instance's tag matches the requested type. constexpr bool IsRing() const { return tag == RING; } diff --git a/absl/strings/internal/cord_rep_btree.cc b/absl/strings/internal/cord_rep_btree.cc index 7ce36128..985f0724 100644 --- a/absl/strings/internal/cord_rep_btree.cc +++ b/absl/strings/internal/cord_rep_btree.cc @@ -502,7 +502,7 @@ OpResult CordRepBtree::SetEdge(bool owned, CordRep* edge, size_t delta) { // open interval [begin, back) or [begin + 1, end) depending on `edge_type`. // We conveniently cover both case using a constexpr `shift` being 0 or 1 // as `end :== back + 1`. - result = {CopyRaw(), kCopied}; + result = {CopyRaw(length), kCopied}; constexpr int shift = edge_type == kFront ? 1 : 0; for (CordRep* r : Edges(begin() + shift, back() + shift)) { CordRep::Ref(r); diff --git a/absl/strings/internal/cord_rep_btree.h b/absl/strings/internal/cord_rep_btree.h index eed5609e..4209e512 100644 --- a/absl/strings/internal/cord_rep_btree.h +++ b/absl/strings/internal/cord_rep_btree.h @@ -446,9 +446,9 @@ class CordRepBtree : public CordRep { template <EdgeType edge_type> static CordRepBtree* NewLeaf(absl::string_view data, size_t extra); - // Creates a raw copy of this Btree node, copying all properties, but - // without adding any references to existing edges. - CordRepBtree* CopyRaw() const; + // Creates a raw copy of this Btree node with the specified length, copying + // all properties, but without adding any references to existing edges. + CordRepBtree* CopyRaw(size_t new_length) const; // Creates a full copy of this Btree node, adding a reference on all edges. CordRepBtree* Copy() const; @@ -666,15 +666,28 @@ inline void CordRepBtree::Unref(absl::Span<CordRep* const> edges) { } } -inline CordRepBtree* CordRepBtree::CopyRaw() const { - auto* tree = static_cast<CordRepBtree*>(::operator new(sizeof(CordRepBtree))); - memcpy(static_cast<void*>(tree), this, sizeof(CordRepBtree)); - new (&tree->refcount) RefcountAndFlags; +inline CordRepBtree* CordRepBtree::CopyRaw(size_t new_length) const { + CordRepBtree* tree = new CordRepBtree; + + // `length` and `refcount` are the first members of `CordRepBtree`. + // We initialize `length` using the given length, have `refcount` be set to + // ref = 1 through its default constructor, and copy all data beyond + // 'refcount' which starts with `tag` using a single memcpy: all contents + // except `refcount` is trivially copyable, and the compiler does not + // efficiently coalesce member-wise copy of these members. + // See https://gcc.godbolt.org/z/qY8zsca6z + // # LINT.IfChange(copy_raw) + tree->length = new_length; + uint8_t* dst = &tree->tag; + const uint8_t* src = &tag; + const ptrdiff_t offset = src - reinterpret_cast<const uint8_t*>(this); + memcpy(dst, src, sizeof(CordRepBtree) - static_cast<size_t>(offset)); return tree; + // # LINT.ThenChange() } inline CordRepBtree* CordRepBtree::Copy() const { - CordRepBtree* tree = CopyRaw(); + CordRepBtree* tree = CopyRaw(length); for (CordRep* rep : Edges()) CordRep::Ref(rep); return tree; } @@ -683,8 +696,7 @@ inline CordRepBtree* CordRepBtree::CopyToEndFrom(size_t begin, size_t new_length) const { assert(begin >= this->begin()); assert(begin <= this->end()); - CordRepBtree* tree = CopyRaw(); - tree->length = new_length; + CordRepBtree* tree = CopyRaw(new_length); tree->set_begin(begin); for (CordRep* edge : tree->Edges()) CordRep::Ref(edge); return tree; @@ -694,8 +706,7 @@ inline CordRepBtree* CordRepBtree::CopyBeginTo(size_t end, size_t new_length) const { assert(end <= capacity()); assert(end >= this->begin()); - CordRepBtree* tree = CopyRaw(); - tree->length = new_length; + CordRepBtree* tree = CopyRaw(new_length); tree->set_end(end); for (CordRep* edge : tree->Edges()) CordRep::Ref(edge); return tree; diff --git a/absl/strings/internal/cord_rep_crc.cc b/absl/strings/internal/cord_rep_crc.cc index ee140354..dbe54cc4 100644 --- a/absl/strings/internal/cord_rep_crc.cc +++ b/absl/strings/internal/cord_rep_crc.cc @@ -16,6 +16,7 @@ #include <cassert> #include <cstdint> +#include <utility> #include "absl/base/config.h" #include "absl/strings/internal/cord_internal.h" @@ -24,11 +25,10 @@ namespace absl { ABSL_NAMESPACE_BEGIN namespace cord_internal { -CordRepCrc* CordRepCrc::New(CordRep* child, uint32_t crc) { - assert(child != nullptr); - if (child->IsCrc()) { +CordRepCrc* CordRepCrc::New(CordRep* child, crc_internal::CrcCordState state) { + if (child != nullptr && child->IsCrc()) { if (child->refcount.IsOne()) { - child->crc()->crc = crc; + child->crc()->crc_cord_state = std::move(state); return child->crc(); } CordRep* old = child; @@ -37,15 +37,17 @@ CordRepCrc* CordRepCrc::New(CordRep* child, uint32_t crc) { CordRep::Unref(old); } auto* new_cordrep = new CordRepCrc; - new_cordrep->length = child->length; + new_cordrep->length = child != nullptr ? child->length : 0; new_cordrep->tag = cord_internal::CRC; new_cordrep->child = child; - new_cordrep->crc = crc; + new_cordrep->crc_cord_state = std::move(state); return new_cordrep; } void CordRepCrc::Destroy(CordRepCrc* node) { - CordRep::Unref(node->child); + if (node->child != nullptr) { + CordRep::Unref(node->child); + } delete node; } diff --git a/absl/strings/internal/cord_rep_crc.h b/absl/strings/internal/cord_rep_crc.h index 5294b0d1..379d7a60 100644 --- a/absl/strings/internal/cord_rep_crc.h +++ b/absl/strings/internal/cord_rep_crc.h @@ -20,6 +20,7 @@ #include "absl/base/config.h" #include "absl/base/optimization.h" +#include "absl/crc/internal/crc_cord_state.h" #include "absl/strings/internal/cord_internal.h" namespace absl { @@ -34,14 +35,14 @@ namespace cord_internal { // the contained checksum is the user's responsibility. struct CordRepCrc : public CordRep { CordRep* child; - uint32_t crc; + absl::crc_internal::CrcCordState crc_cord_state; // Consumes `child` and returns a CordRepCrc prefixed tree containing `child`. // If the specified `child` is itself a CordRepCrc node, then this method - // either replaces the existing node, or directly updates the crc value in it + // either replaces the existing node, or directly updates the crc state in it // depending on the node being shared or not, i.e.: refcount.IsOne(). - // `child` must not be null. Never returns null. - static CordRepCrc* New(CordRep* child, uint32_t crc); + // `child` must only be null if the Cord is empty. Never returns null. + static CordRepCrc* New(CordRep* child, crc_internal::CrcCordState state); // Destroys (deletes) the provided node. `node` must not be null. static void Destroy(CordRepCrc* node); diff --git a/absl/strings/internal/cord_rep_crc_test.cc b/absl/strings/internal/cord_rep_crc_test.cc index d73ea7b3..3d27c33c 100644 --- a/absl/strings/internal/cord_rep_crc_test.cc +++ b/absl/strings/internal/cord_rep_crc_test.cc @@ -17,6 +17,7 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" #include "absl/base/config.h" +#include "absl/crc/internal/crc_cord_state.h" #include "absl/strings/internal/cord_internal.h" #include "absl/strings/internal/cord_rep_test_util.h" @@ -27,47 +28,51 @@ namespace { using ::absl::cordrep_testing::MakeFlat; using ::testing::Eq; +using ::testing::IsNull; using ::testing::Ne; #if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST -TEST(CordRepCrc, NewWithNullPtr) { - EXPECT_DEATH(CordRepCrc::New(nullptr, 0), ""); -} - TEST(CordRepCrc, RemoveCrcWithNullptr) { EXPECT_DEATH(RemoveCrcNode(nullptr), ""); } #endif // !NDEBUG && GTEST_HAS_DEATH_TEST +absl::crc_internal::CrcCordState MakeCrcCordState(uint32_t crc) { + crc_internal::CrcCordState state; + state.mutable_rep()->prefix_crc.push_back( + crc_internal::CrcCordState::PrefixCrc(42, crc32c_t{crc})); + return state; +} + TEST(CordRepCrc, NewDestroy) { CordRep* rep = cordrep_testing::MakeFlat("Hello world"); - CordRepCrc* crc = CordRepCrc::New(rep, 12345); + CordRepCrc* crc = CordRepCrc::New(rep, MakeCrcCordState(12345)); EXPECT_TRUE(crc->refcount.IsOne()); EXPECT_THAT(crc->child, Eq(rep)); - EXPECT_THAT(crc->crc, Eq(12345u)); + EXPECT_THAT(crc->crc_cord_state.Checksum(), Eq(crc32c_t{12345u})); EXPECT_TRUE(rep->refcount.IsOne()); CordRepCrc::Destroy(crc); } TEST(CordRepCrc, NewExistingCrcNotShared) { CordRep* rep = cordrep_testing::MakeFlat("Hello world"); - CordRepCrc* crc = CordRepCrc::New(rep, 12345); - CordRepCrc* new_crc = CordRepCrc::New(crc, 54321); + CordRepCrc* crc = CordRepCrc::New(rep, MakeCrcCordState(12345)); + CordRepCrc* new_crc = CordRepCrc::New(crc, MakeCrcCordState(54321)); EXPECT_THAT(new_crc, Eq(crc)); EXPECT_TRUE(new_crc->refcount.IsOne()); EXPECT_THAT(new_crc->child, Eq(rep)); - EXPECT_THAT(new_crc->crc, Eq(54321u)); + EXPECT_THAT(new_crc->crc_cord_state.Checksum(), Eq(crc32c_t{54321u})); EXPECT_TRUE(rep->refcount.IsOne()); CordRepCrc::Destroy(new_crc); } TEST(CordRepCrc, NewExistingCrcShared) { CordRep* rep = cordrep_testing::MakeFlat("Hello world"); - CordRepCrc* crc = CordRepCrc::New(rep, 12345); + CordRepCrc* crc = CordRepCrc::New(rep, MakeCrcCordState(12345)); CordRep::Ref(crc); - CordRepCrc* new_crc = CordRepCrc::New(crc, 54321); + CordRepCrc* new_crc = CordRepCrc::New(crc, MakeCrcCordState(54321)); EXPECT_THAT(new_crc, Ne(crc)); EXPECT_TRUE(new_crc->refcount.IsOne()); @@ -75,13 +80,23 @@ TEST(CordRepCrc, NewExistingCrcShared) { EXPECT_FALSE(rep->refcount.IsOne()); EXPECT_THAT(crc->child, Eq(rep)); EXPECT_THAT(new_crc->child, Eq(rep)); - EXPECT_THAT(crc->crc, Eq(12345u)); - EXPECT_THAT(new_crc->crc, Eq(54321u)); + EXPECT_THAT(crc->crc_cord_state.Checksum(), Eq(crc32c_t{12345u})); + EXPECT_THAT(new_crc->crc_cord_state.Checksum(), Eq(crc32c_t{54321u})); CordRep::Unref(crc); CordRep::Unref(new_crc); } +TEST(CordRepCrc, NewEmpty) { + CordRepCrc* crc = CordRepCrc::New(nullptr, MakeCrcCordState(12345)); + EXPECT_TRUE(crc->refcount.IsOne()); + EXPECT_THAT(crc->child, IsNull()); + EXPECT_THAT(crc->length, Eq(0u)); + EXPECT_THAT(crc->crc_cord_state.Checksum(), Eq(crc32c_t{12345u})); + EXPECT_TRUE(crc->refcount.IsOne()); + CordRepCrc::Destroy(crc); +} + TEST(CordRepCrc, RemoveCrcNotCrc) { CordRep* rep = cordrep_testing::MakeFlat("Hello world"); CordRep* nocrc = RemoveCrcNode(rep); @@ -91,7 +106,7 @@ TEST(CordRepCrc, RemoveCrcNotCrc) { TEST(CordRepCrc, RemoveCrcNotShared) { CordRep* rep = cordrep_testing::MakeFlat("Hello world"); - CordRepCrc* crc = CordRepCrc::New(rep, 12345); + CordRepCrc* crc = CordRepCrc::New(rep, MakeCrcCordState(12345)); CordRep* nocrc = RemoveCrcNode(crc); EXPECT_THAT(nocrc, Eq(rep)); EXPECT_TRUE(rep->refcount.IsOne()); @@ -100,7 +115,7 @@ TEST(CordRepCrc, RemoveCrcNotShared) { TEST(CordRepCrc, RemoveCrcShared) { CordRep* rep = cordrep_testing::MakeFlat("Hello world"); - CordRepCrc* crc = CordRepCrc::New(rep, 12345); + CordRepCrc* crc = CordRepCrc::New(rep, MakeCrcCordState(12345)); CordRep::Ref(crc); CordRep* nocrc = RemoveCrcNode(crc); EXPECT_THAT(nocrc, Eq(rep)); diff --git a/absl/strings/internal/cordz_info_statistics_test.cc b/absl/strings/internal/cordz_info_statistics_test.cc index 6d6feb52..53d2f2ea 100644 --- a/absl/strings/internal/cordz_info_statistics_test.cc +++ b/absl/strings/internal/cordz_info_statistics_test.cc @@ -19,6 +19,7 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" #include "absl/base/config.h" +#include "absl/crc/internal/crc_cord_state.h" #include "absl/strings/cord.h" #include "absl/strings/internal/cord_internal.h" #include "absl/strings/internal/cord_rep_btree.h" @@ -451,7 +452,8 @@ TEST(CordzInfoStatisticsTest, BtreeNodeShared) { TEST(CordzInfoStatisticsTest, Crc) { RefHelper ref; auto* left = Flat(1000); - auto* crc = ref.NeedsUnref(CordRepCrc::New(left, 12345)); + auto* crc = + ref.NeedsUnref(CordRepCrc::New(left, crc_internal::CrcCordState())); CordzStatistics expected; expected.size = left->length; diff --git a/absl/strings/internal/damerau_levenshtein_distance.cc b/absl/strings/internal/damerau_levenshtein_distance.cc index 7cc23acd..a084568f 100644 --- a/absl/strings/internal/damerau_levenshtein_distance.cc +++ b/absl/strings/internal/damerau_levenshtein_distance.cc @@ -31,8 +31,8 @@ namespace strings_internal { // detected. // When the distance is larger than cutoff, or one of the strings has more // than MAX_SIZE=100 characters, the code returns min(MAX_SIZE, cutoff) + 1. -size_t CappedDamerauLevenshteinDistance(absl::string_view s1, - absl::string_view s2, uint8_t cutoff) { +uint8_t CappedDamerauLevenshteinDistance(absl::string_view s1, + absl::string_view s2, uint8_t cutoff) { const uint8_t MAX_SIZE = 100; const uint8_t _cutoff = std::min(MAX_SIZE, cutoff); const uint8_t cutoff_plus_1 = static_cast<uint8_t>(_cutoff + 1); @@ -42,7 +42,7 @@ size_t CappedDamerauLevenshteinDistance(absl::string_view s1, return cutoff_plus_1; if (s1.empty()) - return std::min(static_cast<size_t>(cutoff_plus_1), s2.size()); + return static_cast<uint8_t>(s2.size()); // Lower diagonal bound: y = x - lower_diag const uint8_t lower_diag = diff --git a/absl/strings/internal/damerau_levenshtein_distance.h b/absl/strings/internal/damerau_levenshtein_distance.h index b9bb6fe1..1a968425 100644 --- a/absl/strings/internal/damerau_levenshtein_distance.h +++ b/absl/strings/internal/damerau_levenshtein_distance.h @@ -25,8 +25,8 @@ ABSL_NAMESPACE_BEGIN namespace strings_internal { // Calculate DamerauLevenshtein distance between two strings. // When the distance is larger than cutoff, the code just returns cutoff + 1. -size_t CappedDamerauLevenshteinDistance(absl::string_view s1, - absl::string_view s2, uint8_t cutoff); +uint8_t CappedDamerauLevenshteinDistance(absl::string_view s1, + absl::string_view s2, uint8_t cutoff); } // namespace strings_internal ABSL_NAMESPACE_END diff --git a/absl/strings/internal/damerau_levenshtein_distance_test.cc b/absl/strings/internal/damerau_levenshtein_distance_test.cc index 45cb5bd9..a342b7db 100644 --- a/absl/strings/internal/damerau_levenshtein_distance_test.cc +++ b/absl/strings/internal/damerau_levenshtein_distance_test.cc @@ -24,76 +24,76 @@ namespace { using absl::strings_internal::CappedDamerauLevenshteinDistance; TEST(Distance, TestDistances) { - EXPECT_THAT(CappedDamerauLevenshteinDistance("ab", "ab", 6), 0u); - EXPECT_THAT(CappedDamerauLevenshteinDistance("a", "b", 6), 1u); - EXPECT_THAT(CappedDamerauLevenshteinDistance("ca", "abc", 6), 3u); - EXPECT_THAT(CappedDamerauLevenshteinDistance("abcd", "ad", 6), 2u); - EXPECT_THAT(CappedDamerauLevenshteinDistance("abcd", "cadb", 6), 4u); - EXPECT_THAT(CappedDamerauLevenshteinDistance("abcd", "bdac", 6), 4u); - EXPECT_THAT(CappedDamerauLevenshteinDistance("ab", "ab", 0), 0u); - EXPECT_THAT(CappedDamerauLevenshteinDistance("", "", 0), 0u); + EXPECT_THAT(CappedDamerauLevenshteinDistance("ab", "ab", 6), uint8_t{0}); + EXPECT_THAT(CappedDamerauLevenshteinDistance("a", "b", 6), uint8_t{1}); + EXPECT_THAT(CappedDamerauLevenshteinDistance("ca", "abc", 6), uint8_t{3}); + EXPECT_THAT(CappedDamerauLevenshteinDistance("abcd", "ad", 6), uint8_t{2}); + EXPECT_THAT(CappedDamerauLevenshteinDistance("abcd", "cadb", 6), uint8_t{4}); + EXPECT_THAT(CappedDamerauLevenshteinDistance("abcd", "bdac", 6), uint8_t{4}); + EXPECT_THAT(CappedDamerauLevenshteinDistance("ab", "ab", 0), uint8_t{0}); + EXPECT_THAT(CappedDamerauLevenshteinDistance("", "", 0), uint8_t{0}); // combinations for 3-character strings: // 1, 2, 3 removals, insertions or replacements and transpositions - EXPECT_THAT(CappedDamerauLevenshteinDistance("abc", "abc", 6), 0u); + EXPECT_THAT(CappedDamerauLevenshteinDistance("abc", "abc", 6), uint8_t{0}); for (auto res : {"", "ca", "efg", "ea", "ce", "ceb", "eca", "cae", "cea", "bea"}) { - EXPECT_THAT(CappedDamerauLevenshteinDistance("abc", res, 6), 3u); - EXPECT_THAT(CappedDamerauLevenshteinDistance(res, "abc", 6), 3u); + EXPECT_THAT(CappedDamerauLevenshteinDistance("abc", res, 6), uint8_t{3}); + EXPECT_THAT(CappedDamerauLevenshteinDistance(res, "abc", 6), uint8_t{3}); } for (auto res : {"a", "b", "c", "ba", "cb", "bca", "cab", "cba", "ace", "efc", "ebf", "aef", "ae", "be", "eb", "ec", "ecb", "bec", "bce", "cbe", "ace", "eac", "aeb", "bae", "eab", "eba"}) { - EXPECT_THAT(CappedDamerauLevenshteinDistance("abc", res, 6), 2u); - EXPECT_THAT(CappedDamerauLevenshteinDistance(res, "abc", 6), 2u); + EXPECT_THAT(CappedDamerauLevenshteinDistance("abc", res, 6), uint8_t{2}); + EXPECT_THAT(CappedDamerauLevenshteinDistance(res, "abc", 6), uint8_t{2}); } for (auto res : {"ab", "ac", "bc", "acb", "bac", "ebc", "aec", "abe"}) { - EXPECT_THAT(CappedDamerauLevenshteinDistance("abc", res, 6), 1u); - EXPECT_THAT(CappedDamerauLevenshteinDistance(res, "abc", 6), 1u); + EXPECT_THAT(CappedDamerauLevenshteinDistance("abc", res, 6), uint8_t{1}); + EXPECT_THAT(CappedDamerauLevenshteinDistance(res, "abc", 6), uint8_t{1}); } } TEST(Distance, TestCutoff) { // Returing cutoff + 1 if the value is larger than cutoff or string longer // than MAX_SIZE. - EXPECT_THAT(CappedDamerauLevenshteinDistance("abcd", "a", 3), 3u); - EXPECT_THAT(CappedDamerauLevenshteinDistance("abcd", "a", 2), 3u); - EXPECT_THAT(CappedDamerauLevenshteinDistance("abcd", "a", 1), 2u); - EXPECT_THAT(CappedDamerauLevenshteinDistance("abcdefg", "a", 2), 3u); - EXPECT_THAT(CappedDamerauLevenshteinDistance("a", "abcde", 2), 3u); + EXPECT_THAT(CappedDamerauLevenshteinDistance("abcd", "a", 3), uint8_t{3}); + EXPECT_THAT(CappedDamerauLevenshteinDistance("abcd", "a", 2), uint8_t{3}); + EXPECT_THAT(CappedDamerauLevenshteinDistance("abcd", "a", 1), uint8_t{2}); + EXPECT_THAT(CappedDamerauLevenshteinDistance("abcdefg", "a", 2), uint8_t{3}); + EXPECT_THAT(CappedDamerauLevenshteinDistance("a", "abcde", 2), uint8_t{3}); EXPECT_THAT(CappedDamerauLevenshteinDistance(std::string(102, 'a'), std::string(102, 'a'), 105), - 101u); + uint8_t{101}); EXPECT_THAT(CappedDamerauLevenshteinDistance(std::string(100, 'a'), std::string(100, 'a'), 100), - 0u); + uint8_t{0}); EXPECT_THAT(CappedDamerauLevenshteinDistance(std::string(100, 'a'), std::string(100, 'b'), 100), - 100u); + uint8_t{100}); EXPECT_THAT(CappedDamerauLevenshteinDistance(std::string(100, 'a'), std::string(99, 'a'), 2), - 1u); + uint8_t{1}); EXPECT_THAT(CappedDamerauLevenshteinDistance(std::string(100, 'a'), std::string(101, 'a'), 2), - 3u); + uint8_t{3}); EXPECT_THAT(CappedDamerauLevenshteinDistance(std::string(100, 'a'), std::string(101, 'a'), 2), - 3u); + uint8_t{3}); EXPECT_THAT(CappedDamerauLevenshteinDistance(std::string(UINT8_MAX + 1, 'a'), std::string(UINT8_MAX + 1, 'b'), UINT8_MAX), - 101u); + uint8_t{101}); EXPECT_THAT(CappedDamerauLevenshteinDistance(std::string(UINT8_MAX - 1, 'a'), std::string(UINT8_MAX - 1, 'b'), UINT8_MAX), - 101u); + uint8_t{101}); EXPECT_THAT( CappedDamerauLevenshteinDistance(std::string(UINT8_MAX, 'a'), std::string(UINT8_MAX, 'b'), UINT8_MAX), - 101u); + uint8_t{101}); EXPECT_THAT(CappedDamerauLevenshteinDistance(std::string(UINT8_MAX - 1, 'a'), std::string(UINT8_MAX - 1, 'a'), UINT8_MAX), - 101u); + uint8_t{101}); } } // namespace diff --git a/absl/strings/internal/has_absl_stringify.h b/absl/strings/internal/has_absl_stringify.h new file mode 100644 index 00000000..55a08508 --- /dev/null +++ b/absl/strings/internal/has_absl_stringify.h @@ -0,0 +1,55 @@ +// Copyright 2022 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_HAS_ABSL_STRINGIFY_H_ +#define ABSL_STRINGS_INTERNAL_HAS_ABSL_STRINGIFY_H_ +#include <string> +#include <type_traits> +#include <utility> + +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN + +namespace strings_internal { + +// This is an empty class not intended to be used. It exists so that +// `HasAbslStringify` can reference a universal class rather than needing to be +// copied for each new sink. +class UnimplementedSink { + public: + void Append(size_t count, char ch); + + void Append(string_view v); + + // Support `absl::Format(&sink, format, args...)`. + friend void AbslFormatFlush(UnimplementedSink* sink, absl::string_view v); +}; + +template <typename T, typename = void> +struct HasAbslStringify : std::false_type {}; + +template <typename T> +struct HasAbslStringify< + T, std::enable_if_t<std::is_void<decltype(AbslStringify( + std::declval<strings_internal::UnimplementedSink&>(), + std::declval<const T&>()))>::value>> : std::true_type {}; + +} // namespace strings_internal + +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_HAS_ABSL_STRINGIFY_H_ diff --git a/absl/strings/internal/str_format/arg.cc b/absl/strings/internal/str_format/arg.cc index 967fe9ca..018dd052 100644 --- a/absl/strings/internal/str_format/arg.cc +++ b/absl/strings/internal/str_format/arg.cc @@ -297,6 +297,37 @@ constexpr auto ConvertV(T) { } template <typename T> +bool ConvertFloatArg(T v, FormatConversionSpecImpl conv, FormatSinkImpl *sink) { + if (conv.conversion_char() == FormatConversionCharInternal::v) { + conv.set_conversion_char(FormatConversionCharInternal::g); + } + + return FormatConversionCharIsFloat(conv.conversion_char()) && + ConvertFloatImpl(v, conv, sink); +} + +inline bool ConvertStringArg(string_view v, const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + if (conv.is_basic()) { + sink->Append(v); + return true; + } + return sink->PutPaddedString(v, conv.width(), conv.precision(), + conv.has_left_flag()); +} + +} // namespace + +bool ConvertBoolArg(bool v, FormatSinkImpl *sink) { + if (v) { + sink->Append("true"); + } else { + sink->Append("false"); + } + return true; +} + +template <typename T> bool ConvertIntArg(T v, FormatConversionSpecImpl conv, FormatSinkImpl *sink) { using U = typename MakeUnsigned<T>::type; IntDigits as_digits; @@ -354,36 +385,37 @@ bool ConvertIntArg(T v, FormatConversionSpecImpl conv, FormatSinkImpl *sink) { return ConvertIntImplInnerSlow(as_digits, conv, sink); } -template <typename T> -bool ConvertFloatArg(T v, FormatConversionSpecImpl conv, FormatSinkImpl *sink) { - if (conv.conversion_char() == FormatConversionCharInternal::v) { - conv.set_conversion_char(FormatConversionCharInternal::g); - } - - return FormatConversionCharIsFloat(conv.conversion_char()) && - ConvertFloatImpl(v, conv, sink); -} - -inline bool ConvertStringArg(string_view v, const FormatConversionSpecImpl conv, - FormatSinkImpl *sink) { - if (conv.is_basic()) { - sink->Append(v); - return true; - } - return sink->PutPaddedString(v, conv.width(), conv.precision(), - conv.has_left_flag()); -} - -} // namespace - -bool ConvertBoolArg(bool v, FormatSinkImpl *sink) { - if (v) { - sink->Append("true"); - } else { - sink->Append("false"); - } - return true; -} +template bool ConvertIntArg<char>(char v, FormatConversionSpecImpl conv, + FormatSinkImpl *sink); +template bool ConvertIntArg<signed char>(signed char v, + FormatConversionSpecImpl conv, + FormatSinkImpl *sink); +template bool ConvertIntArg<unsigned char>(unsigned char v, + FormatConversionSpecImpl conv, + FormatSinkImpl *sink); +template bool ConvertIntArg<short>(short v, // NOLINT + FormatConversionSpecImpl conv, + FormatSinkImpl *sink); +template bool ConvertIntArg<unsigned short>(unsigned short v, // NOLINT + FormatConversionSpecImpl conv, + FormatSinkImpl *sink); +template bool ConvertIntArg<int>(int v, FormatConversionSpecImpl conv, + FormatSinkImpl *sink); +template bool ConvertIntArg<unsigned int>(unsigned int v, + FormatConversionSpecImpl conv, + FormatSinkImpl *sink); +template bool ConvertIntArg<long>(long v, // NOLINT + FormatConversionSpecImpl conv, + FormatSinkImpl *sink); +template bool ConvertIntArg<unsigned long>(unsigned long v, // NOLINT + FormatConversionSpecImpl conv, + FormatSinkImpl *sink); +template bool ConvertIntArg<long long>(long long v, // NOLINT + FormatConversionSpecImpl conv, + FormatSinkImpl *sink); +template bool ConvertIntArg<unsigned long long>(unsigned long long v, // NOLINT + FormatConversionSpecImpl conv, + FormatSinkImpl *sink); // ==================== Strings ==================== StringConvertResult FormatConvertImpl(const std::string &v, diff --git a/absl/strings/internal/str_format/arg.h b/absl/strings/internal/str_format/arg.h index b3e4ff15..e4b16628 100644 --- a/absl/strings/internal/str_format/arg.h +++ b/absl/strings/internal/str_format/arg.h @@ -18,6 +18,7 @@ #include <string.h> #include <wchar.h> +#include <algorithm> #include <cstdio> #include <iomanip> #include <limits> @@ -25,10 +26,12 @@ #include <sstream> #include <string> #include <type_traits> +#include <utility> #include "absl/base/port.h" #include "absl/meta/type_traits.h" #include "absl/numeric/int128.h" +#include "absl/strings/internal/has_absl_stringify.h" #include "absl/strings/internal/str_format/extension.h" #include "absl/strings/string_view.h" @@ -50,6 +53,19 @@ struct ArgConvertResult { bool value; }; +using IntegralConvertResult = ArgConvertResult<FormatConversionCharSetUnion( + FormatConversionCharSetInternal::c, + FormatConversionCharSetInternal::kNumeric, + FormatConversionCharSetInternal::kStar, + FormatConversionCharSetInternal::v)>; +using FloatingConvertResult = ArgConvertResult<FormatConversionCharSetUnion( + FormatConversionCharSetInternal::kFloating, + FormatConversionCharSetInternal::v)>; +using CharConvertResult = ArgConvertResult<FormatConversionCharSetUnion( + FormatConversionCharSetInternal::c, + FormatConversionCharSetInternal::kNumeric, + FormatConversionCharSetInternal::kStar)>; + template <typename T, typename = void> struct HasUserDefinedConvert : std::false_type {}; @@ -67,6 +83,44 @@ void AbslFormatConvert(); void AbslStringify(); template <typename T> +bool ConvertIntArg(T v, FormatConversionSpecImpl conv, FormatSinkImpl* sink); + +// Forward declarations of internal `ConvertIntArg` function template +// instantiations are here to avoid including the template body in the headers +// and instantiating it in large numbers of translation units. Explicit +// instantiations can be found in "absl/strings/internal/str_format/arg.cc" +extern template bool ConvertIntArg<char>(char v, FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +extern template bool ConvertIntArg<signed char>(signed char v, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +extern template bool ConvertIntArg<unsigned char>(unsigned char v, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +extern template bool ConvertIntArg<short>(short v, // NOLINT + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +extern template bool ConvertIntArg<unsigned short>( // NOLINT + unsigned short v, FormatConversionSpecImpl conv, // NOLINT + FormatSinkImpl* sink); +extern template bool ConvertIntArg<int>(int v, FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +extern template bool ConvertIntArg<unsigned int>(unsigned int v, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +extern template bool ConvertIntArg<long>( // NOLINT + long v, FormatConversionSpecImpl conv, FormatSinkImpl* sink); // NOLINT +extern template bool ConvertIntArg<unsigned long>(unsigned long v, // NOLINT + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +extern template bool ConvertIntArg<long long>(long long v, // NOLINT + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +extern template bool ConvertIntArg<unsigned long long>( // NOLINT + unsigned long long v, FormatConversionSpecImpl conv, // NOLINT + FormatSinkImpl* sink); + +template <typename T> auto FormatConvertImpl(const T& v, FormatConversionSpecImpl conv, FormatSinkImpl* sink) -> decltype(AbslFormatConvert(v, @@ -82,10 +136,30 @@ auto FormatConvertImpl(const T& v, FormatConversionSpecImpl conv, } template <typename T> +auto FormatConvertImpl(const T& v, FormatConversionSpecImpl conv, + FormatSinkImpl* sink) + -> std::enable_if_t<std::is_enum<T>::value && + std::is_void<decltype(AbslStringify( + std::declval<FormatSink&>(), v))>::value, + IntegralConvertResult> { + if (conv.conversion_char() == FormatConversionCharInternal::v) { + using FormatSinkT = + absl::enable_if_t<sizeof(const T& (*)()) != 0, FormatSink>; + auto fs = sink->Wrap<FormatSinkT>(); + AbslStringify(fs, v); + return {true}; + } else { + return {ConvertIntArg( + static_cast<typename std::underlying_type<T>::type>(v), conv, sink)}; + } +} + +template <typename T> auto FormatConvertImpl(const T& v, FormatConversionSpecImpl, FormatSinkImpl* sink) - -> std::enable_if_t<std::is_void<decltype(AbslStringify( - std::declval<FormatSink&>(), v))>::value, + -> std::enable_if_t<!std::is_enum<T>::value && + std::is_void<decltype(AbslStringify( + std::declval<FormatSink&>(), v))>::value, ArgConvertResult<FormatConversionCharSetInternal::v>> { using FormatSinkT = absl::enable_if_t<sizeof(const T& (*)()) != 0, FormatSink>; @@ -191,19 +265,6 @@ StringConvertResult FormatConvertImpl(const AbslCord& value, return {true}; } -using IntegralConvertResult = ArgConvertResult<FormatConversionCharSetUnion( - FormatConversionCharSetInternal::c, - FormatConversionCharSetInternal::kNumeric, - FormatConversionCharSetInternal::kStar, - FormatConversionCharSetInternal::v)>; -using FloatingConvertResult = ArgConvertResult<FormatConversionCharSetUnion( - FormatConversionCharSetInternal::kFloating, - FormatConversionCharSetInternal::v)>; -using CharConvertResult = ArgConvertResult<FormatConversionCharSetUnion( - FormatConversionCharSetInternal::c, - FormatConversionCharSetInternal::kNumeric, - FormatConversionCharSetInternal::kStar)>; - bool ConvertBoolArg(bool v, FormatSinkImpl* sink); // Floats. @@ -271,7 +332,8 @@ IntegralConvertResult FormatConvertImpl(T v, FormatConversionSpecImpl conv, // FormatArgImpl will use the underlying Convert functions instead. template <typename T> typename std::enable_if<std::is_enum<T>::value && - !HasUserDefinedConvert<T>::value, + !HasUserDefinedConvert<T>::value && + !strings_internal::HasAbslStringify<T>::value, IntegralConvertResult>::type FormatConvertImpl(T v, FormatConversionSpecImpl conv, FormatSinkImpl* sink); @@ -384,7 +446,8 @@ class FormatArgImpl { template <typename T, typename = void> struct DecayType { static constexpr bool kHasUserDefined = - str_format_internal::HasUserDefinedConvert<T>::value; + str_format_internal::HasUserDefinedConvert<T>::value || + strings_internal::HasAbslStringify<T>::value; using type = typename std::conditional< !kHasUserDefined && std::is_convertible<T, const char*>::value, const char*, @@ -396,6 +459,7 @@ class FormatArgImpl { struct DecayType<T, typename std::enable_if< !str_format_internal::HasUserDefinedConvert<T>::value && + !strings_internal::HasAbslStringify<T>::value && std::is_enum<T>::value>::type> { using type = typename std::underlying_type<T>::type; }; diff --git a/absl/strings/internal/str_format/checker.h b/absl/strings/internal/str_format/checker.h index aeb9d48d..eab6ab9d 100644 --- a/absl/strings/internal/str_format/checker.h +++ b/absl/strings/internal/str_format/checker.h @@ -15,8 +15,11 @@ #ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_CHECKER_H_ #define ABSL_STRINGS_INTERNAL_STR_FORMAT_CHECKER_H_ +#include <algorithm> + #include "absl/base/attributes.h" #include "absl/strings/internal/str_format/arg.h" +#include "absl/strings/internal/str_format/constexpr_parser.h" #include "absl/strings/internal/str_format/extension.h" // Compile time check support for entry points. @@ -36,333 +39,56 @@ namespace absl { ABSL_NAMESPACE_BEGIN namespace str_format_internal { -constexpr bool AllOf() { return true; } - -template <typename... T> -constexpr bool AllOf(bool b, T... t) { - return b && AllOf(t...); -} - #ifdef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER -constexpr bool ContainsChar(const char* chars, char c) { - return *chars == c || (*chars && ContainsChar(chars + 1, c)); -} - -// A constexpr compatible list of Convs. -struct ConvList { - const FormatConversionCharSet* array; - int count; - - // We do the bound check here to avoid having to do it on the callers. - // Returning an empty FormatConversionCharSet has the same effect as - // short circuiting because it will never match any conversion. - constexpr FormatConversionCharSet operator[](int i) const { - return i < count ? array[i] : FormatConversionCharSet{}; - } - - constexpr ConvList without_front() const { - return count != 0 ? ConvList{array + 1, count - 1} : *this; - } -}; - -template <size_t count> -struct ConvListT { - // Make sure the array has size > 0. - FormatConversionCharSet list[count ? count : 1]; -}; - -constexpr char GetChar(string_view str, size_t index) { - return index < str.size() ? str[index] : char{}; -} - -constexpr string_view ConsumeFront(string_view str, size_t len = 1) { - return len <= str.size() ? string_view(str.data() + len, str.size() - len) - : string_view(); -} - -constexpr string_view ConsumeAnyOf(string_view format, const char* chars) { - while (ContainsChar(chars, GetChar(format, 0))) { - format = ConsumeFront(format); - } - return format; -} - -constexpr bool IsDigit(char c) { return c >= '0' && c <= '9'; } - -// Helper class for the ParseDigits function. -// It encapsulates the two return values we need there. -struct Integer { - string_view format; - int value; - - // If the next character is a '$', consume it. - // Otherwise, make `this` an invalid positional argument. - constexpr Integer ConsumePositionalDollar() const { - if (GetChar(format, 0) == '$') { - return Integer{ConsumeFront(format), value}; - } else { - return Integer{format, 0}; - } - } -}; - -constexpr Integer ParseDigits(string_view format) { - int value = 0; - while (IsDigit(GetChar(format, 0))) { - value = 10 * value + GetChar(format, 0) - '0'; - format = ConsumeFront(format); - } - - return Integer{format, value}; -} - -// Parse digits for a positional argument. -// The parsing also consumes the '$'. -constexpr Integer ParsePositional(string_view format) { - return ParseDigits(format).ConsumePositionalDollar(); -} - -// Parses a single conversion specifier. -// See ConvParser::Run() for post conditions. -class ConvParser { - constexpr ConvParser SetFormat(string_view format) const { - return ConvParser(format, args_, error_, arg_position_, is_positional_); - } - - constexpr ConvParser SetArgs(ConvList args) const { - return ConvParser(format_, args, error_, arg_position_, is_positional_); - } - - constexpr ConvParser SetError(bool error) const { - return ConvParser(format_, args_, error_ || error, arg_position_, - is_positional_); - } - - constexpr ConvParser SetArgPosition(int arg_position) const { - return ConvParser(format_, args_, error_, arg_position, is_positional_); - } - - // Consumes the next arg and verifies that it matches `conv`. - // `error_` is set if there is no next arg or if it doesn't match `conv`. - constexpr ConvParser ConsumeNextArg(char conv) const { - return SetArgs(args_.without_front()).SetError(!Contains(args_[0], conv)); - } - - // Verify that positional argument `i.value` matches `conv`. - // `error_` is set if `i.value` is not a valid argument or if it doesn't - // match. - constexpr ConvParser VerifyPositional(Integer i, char conv) const { - return SetFormat(i.format).SetError(!Contains(args_[i.value - 1], conv)); - } - - // Parse the position of the arg and store it in `arg_position_`. - constexpr ConvParser ParseArgPosition(Integer arg) const { - return SetFormat(arg.format).SetArgPosition(arg.value); - } - - // Consume the flags. - constexpr ConvParser ParseFlags() const { - return SetFormat(ConsumeAnyOf(format_, "-+ #0")); - } - - // Consume the width. - // If it is '*', we verify that it matches `args_`. `error_` is set if it - // doesn't match. - constexpr ConvParser ParseWidth() const { - char first_char = GetChar(format_, 0); - - if (IsDigit(first_char)) { - return SetFormat(ParseDigits(format_).format); - } else if (first_char == '*') { - if (is_positional_) { - return VerifyPositional(ParsePositional(ConsumeFront(format_)), '*'); - } else { - return SetFormat(ConsumeFront(format_)).ConsumeNextArg('*'); - } - } else { - return *this; +template <FormatConversionCharSet... C> +constexpr bool ValidFormatImpl(string_view format) { + int next_arg = 0; + const char* p = format.data(); + const char* const end = p + format.size(); + constexpr FormatConversionCharSet + kAllowedConvs[(std::max)(sizeof...(C), size_t{1})] = {C...}; + bool used[(std::max)(sizeof...(C), size_t{1})]{}; + constexpr int kNumArgs = sizeof...(C); + while (p != end) { + while (p != end && *p != '%') ++p; + if (p == end) { + break; } - } - - // Consume the precision. - // If it is '*', we verify that it matches `args_`. `error_` is set if it - // doesn't match. - constexpr ConvParser ParsePrecision() const { - if (GetChar(format_, 0) != '.') { - return *this; - } else if (GetChar(format_, 1) == '*') { - if (is_positional_) { - return VerifyPositional(ParsePositional(ConsumeFront(format_, 2)), '*'); - } else { - return SetFormat(ConsumeFront(format_, 2)).ConsumeNextArg('*'); - } - } else { - return SetFormat(ParseDigits(ConsumeFront(format_)).format); + if (p + 1 >= end) return false; + if (p[1] == '%') { + // %% + p += 2; + continue; } - } - - // Consume the length characters. - constexpr ConvParser ParseLength() const { - return SetFormat(ConsumeAnyOf(format_, "lLhjztq")); - } - - // Consume the conversion character and verify that it matches `args_`. - // `error_` is set if it doesn't match. - constexpr ConvParser ParseConversion() const { - char first_char = GetChar(format_, 0); - if (first_char == 'v' && *(format_.data() - 1) != '%') { - return SetError(true); + UnboundConversion conv(absl::kConstInit); + p = ConsumeUnboundConversion(p + 1, end, &conv, &next_arg); + if (p == nullptr) return false; + if (conv.arg_position <= 0 || conv.arg_position > kNumArgs) { + return false; } - - if (is_positional_) { - return VerifyPositional({ConsumeFront(format_), arg_position_}, - first_char); - } else { - return ConsumeNextArg(first_char).SetFormat(ConsumeFront(format_)); + if (!Contains(kAllowedConvs[conv.arg_position - 1], conv.conv)) { + return false; } - } - - constexpr ConvParser(string_view format, ConvList args, bool error, - int arg_position, bool is_positional) - : format_(format), - args_(args), - error_(error), - arg_position_(arg_position), - is_positional_(is_positional) {} - - public: - constexpr ConvParser(string_view format, ConvList args, bool is_positional) - : format_(format), - args_(args), - error_(false), - arg_position_(0), - is_positional_(is_positional) {} - - // Consume the whole conversion specifier. - // `format()` will be set to the character after the conversion character. - // `error()` will be set if any of the arguments do not match. - constexpr ConvParser Run() const { - ConvParser parser = *this; - - if (is_positional_) { - parser = ParseArgPosition(ParsePositional(format_)); - } - - return parser.ParseFlags() - .ParseWidth() - .ParsePrecision() - .ParseLength() - .ParseConversion(); - } - - constexpr string_view format() const { return format_; } - constexpr ConvList args() const { return args_; } - constexpr bool error() const { return error_; } - constexpr bool is_positional() const { return is_positional_; } - - private: - string_view format_; - // Current list of arguments. If we are not in positional mode we will consume - // from the front. - ConvList args_; - bool error_; - // Holds the argument position of the conversion character, if we are in - // positional mode. Otherwise, it is unspecified. - int arg_position_; - // Whether we are in positional mode. - // It changes the behavior of '*' and where to find the converted argument. - bool is_positional_; -}; - -// Parses a whole format expression. -// See FormatParser::Run(). -class FormatParser { - static constexpr bool FoundPercent(string_view format) { - return format.empty() || - (GetChar(format, 0) == '%' && GetChar(format, 1) != '%'); - } - - // We use an inner function to increase the recursion limit. - // The inner function consumes up to `limit` characters on every run. - // This increases the limit from 512 to ~512*limit. - static constexpr string_view ConsumeNonPercentInner(string_view format) { - int limit = 20; - while (!FoundPercent(format) && limit != 0) { - size_t len = 0; - - if (GetChar(format, 0) == '%' && GetChar(format, 1) == '%') { - len = 2; - } else { - len = 1; + used[conv.arg_position - 1] = true; + for (auto extra : {conv.width, conv.precision}) { + if (extra.is_from_arg()) { + int pos = extra.get_from_arg(); + if (pos <= 0 || pos > kNumArgs) return false; + used[pos - 1] = true; + if (!Contains(kAllowedConvs[pos - 1], '*')) { + return false; + } } - - format = ConsumeFront(format, len); - --limit; } - - return format; } - - // Consume characters until the next conversion spec %. - // It skips %%. - static constexpr string_view ConsumeNonPercent(string_view format) { - while (!FoundPercent(format)) { - format = ConsumeNonPercentInner(format); + if (sizeof...(C) != 0) { + for (bool b : used) { + if (!b) return false; } - - return format; - } - - static constexpr bool IsPositional(string_view format) { - while (IsDigit(GetChar(format, 0))) { - format = ConsumeFront(format); - } - - return GetChar(format, 0) == '$'; } - - constexpr bool RunImpl(bool is_positional) const { - // In non-positional mode we require all arguments to be consumed. - // In positional mode just reaching the end of the format without errors is - // enough. - return (format_.empty() && (is_positional || args_.count == 0)) || - (!format_.empty() && - ValidateArg( - ConvParser(ConsumeFront(format_), args_, is_positional).Run())); - } - - constexpr bool ValidateArg(ConvParser conv) const { - return !conv.error() && FormatParser(conv.format(), conv.args()) - .RunImpl(conv.is_positional()); - } - - public: - constexpr FormatParser(string_view format, ConvList args) - : format_(ConsumeNonPercent(format)), args_(args) {} - - // Runs the parser for `format` and `args`. - // It verifies that the format is valid and that all conversion specifiers - // match the arguments passed. - // In non-positional mode it also verfies that all arguments are consumed. - constexpr bool Run() const { - return RunImpl(!format_.empty() && IsPositional(ConsumeFront(format_))); - } - - private: - string_view format_; - // Current list of arguments. - // If we are not in positional mode we will consume from the front and will - // have to be empty in the end. - ConvList args_; -}; - -template <FormatConversionCharSet... C> -constexpr bool ValidFormatImpl(string_view format) { - return FormatParser(format, - {ConvListT<sizeof...(C)>{{C...}}.list, sizeof...(C)}) - .Run(); + return true; } #endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER diff --git a/absl/strings/internal/str_format/checker_test.cc b/absl/strings/internal/str_format/checker_test.cc index 680517f7..a86bed38 100644 --- a/absl/strings/internal/str_format/checker_test.cc +++ b/absl/strings/internal/str_format/checker_test.cc @@ -93,6 +93,7 @@ TEST(StrFormatChecker, ValidFormat) { ValidFormat<void (*)(), volatile int*>("%p %p"), // ValidFormat<string_view, const char*, double, void*>( "string_view=%s const char*=%s double=%f void*=%p)"), + ValidFormat<int>("%v"), // ValidFormat<int>("%% %1$d"), // ValidFormat<int>("%1$ld"), // @@ -109,7 +110,9 @@ TEST(StrFormatChecker, ValidFormat) { ValidFormat<int, double>("%2$.*1$f"), // ValidFormat<void*, string_view, const char*, double>( "string_view=%2$s const char*=%3$s double=%4$f void*=%1$p " - "repeat=%3$s)")}; + "repeat=%3$s)"), + ValidFormat<std::string>("%1$v"), + }; for (Case c : trues) { EXPECT_TRUE(c.result) << c.format; @@ -130,6 +133,8 @@ TEST(StrFormatChecker, ValidFormat) { ValidFormat<int>("%*d"), // ValidFormat<std::string>("%p"), // ValidFormat<int (*)(int)>("%d"), // + ValidFormat<int>("%1v"), // + ValidFormat<int>("%.1v"), // ValidFormat<>("%3$d"), // ValidFormat<>("%1$r"), // @@ -138,13 +143,14 @@ TEST(StrFormatChecker, ValidFormat) { ValidFormat<int>("%1$*2$1d"), // ValidFormat<int>("%1$1-d"), // ValidFormat<std::string, int>("%2$*1$s"), // - ValidFormat<std::string>("%1$p"), + ValidFormat<std::string>("%1$p"), // + ValidFormat<int>("%1$*2$v"), // ValidFormat<int, int>("%d %2$d"), // }; for (Case c : falses) { - EXPECT_FALSE(c.result) << c.format; + EXPECT_FALSE(c.result) << "format<" << c.format << ">"; } } diff --git a/absl/strings/internal/str_format/constexpr_parser.h b/absl/strings/internal/str_format/constexpr_parser.h new file mode 100644 index 00000000..3dc1776b --- /dev/null +++ b/absl/strings/internal/str_format/constexpr_parser.h @@ -0,0 +1,351 @@ +// Copyright 2022 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_CONSTEXPR_PARSER_H_ +#define ABSL_STRINGS_INTERNAL_STR_FORMAT_CONSTEXPR_PARSER_H_ + +#include <cassert> +#include <cstdint> +#include <limits> + +#include "absl/base/const_init.h" +#include "absl/strings/internal/str_format/extension.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace str_format_internal { + +enum class LengthMod : std::uint8_t { h, hh, l, ll, L, j, z, t, q, none }; + +// The analyzed properties of a single specified conversion. +struct UnboundConversion { + // This is a user defined default constructor on purpose to skip the + // initialization of parts of the object that are not necessary. + UnboundConversion() {} // NOLINT + + // This constructor is provided for the static checker. We don't want to do + // the unnecessary initialization in the normal case. + explicit constexpr UnboundConversion(absl::ConstInitType) + : arg_position{}, width{}, precision{} {} + + class InputValue { + public: + constexpr void set_value(int value) { + assert(value >= 0); + value_ = value; + } + constexpr int value() const { return value_; } + + // Marks the value as "from arg". aka the '*' format. + // Requires `value >= 1`. + // When set, is_from_arg() return true and get_from_arg() returns the + // original value. + // `value()`'s return value is unspecified in this state. + constexpr void set_from_arg(int value) { + assert(value > 0); + value_ = -value - 1; + } + constexpr bool is_from_arg() const { return value_ < -1; } + constexpr int get_from_arg() const { + assert(is_from_arg()); + return -value_ - 1; + } + + private: + int value_ = -1; + }; + + // No need to initialize. It will always be set in the parser. + int arg_position; + + InputValue width; + InputValue precision; + + Flags flags = Flags::kBasic; + LengthMod length_mod = LengthMod::none; + FormatConversionChar conv = FormatConversionCharInternal::kNone; +}; + +// Helper tag class for the table below. +// It allows fast `char -> ConversionChar/LengthMod/Flags` checking and +// conversions. +class ConvTag { + public: + constexpr ConvTag(FormatConversionChar conversion_char) // NOLINT + : tag_(static_cast<uint8_t>(conversion_char)) {} + constexpr ConvTag(LengthMod length_mod) // NOLINT + : tag_(0x80 | static_cast<uint8_t>(length_mod)) {} + constexpr ConvTag(Flags flags) // NOLINT + : tag_(0xc0 | static_cast<uint8_t>(flags)) {} + constexpr ConvTag() : tag_(0xFF) {} + + constexpr bool is_conv() const { return (tag_ & 0x80) == 0; } + constexpr bool is_length() const { return (tag_ & 0xC0) == 0x80; } + constexpr bool is_flags() const { return (tag_ & 0xE0) == 0xC0; } + + constexpr FormatConversionChar as_conv() const { + assert(is_conv()); + assert(!is_length()); + assert(!is_flags()); + return static_cast<FormatConversionChar>(tag_); + } + constexpr LengthMod as_length() const { + assert(!is_conv()); + assert(is_length()); + assert(!is_flags()); + return static_cast<LengthMod>(tag_ & 0x3F); + } + constexpr Flags as_flags() const { + assert(!is_conv()); + assert(!is_length()); + assert(is_flags()); + return static_cast<Flags>(tag_ & 0x1F); + } + + private: + uint8_t tag_; +}; + +struct ConvTagHolder { + using CC = FormatConversionCharInternal; + using LM = LengthMod; + + // Abbreviations to fit in the table below. + static constexpr auto kFSign = Flags::kSignCol; + static constexpr auto kFAlt = Flags::kAlt; + static constexpr auto kFPos = Flags::kShowPos; + static constexpr auto kFLeft = Flags::kLeft; + static constexpr auto kFZero = Flags::kZero; + + static constexpr ConvTag value[256] = { + {}, {}, {}, {}, {}, {}, {}, {}, // 00-07 + {}, {}, {}, {}, {}, {}, {}, {}, // 08-0f + {}, {}, {}, {}, {}, {}, {}, {}, // 10-17 + {}, {}, {}, {}, {}, {}, {}, {}, // 18-1f + kFSign, {}, {}, kFAlt, {}, {}, {}, {}, // !"#$%&' + {}, {}, {}, kFPos, {}, kFLeft, {}, {}, // ()*+,-./ + kFZero, {}, {}, {}, {}, {}, {}, {}, // 01234567 + {}, {}, {}, {}, {}, {}, {}, {}, // 89:;<=>? + {}, CC::A, {}, {}, {}, CC::E, CC::F, CC::G, // @ABCDEFG + {}, {}, {}, {}, LM::L, {}, {}, {}, // HIJKLMNO + {}, {}, {}, {}, {}, {}, {}, {}, // PQRSTUVW + CC::X, {}, {}, {}, {}, {}, {}, {}, // XYZ[\]^_ + {}, CC::a, {}, CC::c, CC::d, CC::e, CC::f, CC::g, // `abcdefg + LM::h, CC::i, LM::j, {}, LM::l, {}, CC::n, CC::o, // hijklmno + CC::p, LM::q, {}, CC::s, LM::t, CC::u, CC::v, {}, // pqrstuvw + CC::x, {}, LM::z, {}, {}, {}, {}, {}, // xyz{|}! + {}, {}, {}, {}, {}, {}, {}, {}, // 80-87 + {}, {}, {}, {}, {}, {}, {}, {}, // 88-8f + {}, {}, {}, {}, {}, {}, {}, {}, // 90-97 + {}, {}, {}, {}, {}, {}, {}, {}, // 98-9f + {}, {}, {}, {}, {}, {}, {}, {}, // a0-a7 + {}, {}, {}, {}, {}, {}, {}, {}, // a8-af + {}, {}, {}, {}, {}, {}, {}, {}, // b0-b7 + {}, {}, {}, {}, {}, {}, {}, {}, // b8-bf + {}, {}, {}, {}, {}, {}, {}, {}, // c0-c7 + {}, {}, {}, {}, {}, {}, {}, {}, // c8-cf + {}, {}, {}, {}, {}, {}, {}, {}, // d0-d7 + {}, {}, {}, {}, {}, {}, {}, {}, // d8-df + {}, {}, {}, {}, {}, {}, {}, {}, // e0-e7 + {}, {}, {}, {}, {}, {}, {}, {}, // e8-ef + {}, {}, {}, {}, {}, {}, {}, {}, // f0-f7 + {}, {}, {}, {}, {}, {}, {}, {}, // f8-ff + }; +}; + +// Keep a single table for all the conversion chars and length modifiers. +constexpr ConvTag GetTagForChar(char c) { + return ConvTagHolder::value[static_cast<unsigned char>(c)]; +} + +constexpr bool CheckFastPathSetting(const UnboundConversion& conv) { + bool width_precision_needed = + conv.width.value() >= 0 || conv.precision.value() >= 0; + if (width_precision_needed && conv.flags == Flags::kBasic) { +#if defined(__clang__) + // Some compilers complain about this in constexpr even when not executed, + // so only enable the error dump in clang. + fprintf(stderr, + "basic=%d left=%d show_pos=%d sign_col=%d alt=%d zero=%d " + "width=%d precision=%d\n", + conv.flags == Flags::kBasic ? 1 : 0, + FlagsContains(conv.flags, Flags::kLeft) ? 1 : 0, + FlagsContains(conv.flags, Flags::kShowPos) ? 1 : 0, + FlagsContains(conv.flags, Flags::kSignCol) ? 1 : 0, + FlagsContains(conv.flags, Flags::kAlt) ? 1 : 0, + FlagsContains(conv.flags, Flags::kZero) ? 1 : 0, conv.width.value(), + conv.precision.value()); +#endif // defined(__clang__) + return false; + } + return true; +} + +constexpr int ParseDigits(char& c, const char*& pos, const char* const end) { + int digits = c - '0'; + // We do not want to overflow `digits` so we consume at most digits10 + // digits. If there are more digits the parsing will fail later on when the + // digit doesn't match the expected characters. + int num_digits = std::numeric_limits<int>::digits10; + for (;;) { + if (ABSL_PREDICT_FALSE(pos == end)) break; + c = *pos++; + if ('0' > c || c > '9') break; + --num_digits; + if (ABSL_PREDICT_FALSE(!num_digits)) break; + digits = 10 * digits + c - '0'; + } + return digits; +} + +template <bool is_positional> +constexpr const char* ConsumeConversion(const char* pos, const char* const end, + UnboundConversion* conv, + int* next_arg) { + const char* const original_pos = pos; + char c = 0; + // Read the next char into `c` and update `pos`. Returns false if there are + // no more chars to read. +#define ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR() \ + do { \ + if (ABSL_PREDICT_FALSE(pos == end)) return nullptr; \ + c = *pos++; \ + } while (0) + + if (is_positional) { + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr; + conv->arg_position = ParseDigits(c, pos, end); + assert(conv->arg_position > 0); + if (ABSL_PREDICT_FALSE(c != '$')) return nullptr; + } + + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + + // We should start with the basic flag on. + assert(conv->flags == Flags::kBasic); + + // Any non alpha character makes this conversion not basic. + // This includes flags (-+ #0), width (1-9, *) or precision (.). + // All conversion characters and length modifiers are alpha characters. + if (c < 'A') { + while (c <= '0') { + auto tag = GetTagForChar(c); + if (tag.is_flags()) { + conv->flags = conv->flags | tag.as_flags(); + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + } else { + break; + } + } + + if (c <= '9') { + if (c >= '0') { + int maybe_width = ParseDigits(c, pos, end); + if (!is_positional && c == '$') { + if (ABSL_PREDICT_FALSE(*next_arg != 0)) return nullptr; + // Positional conversion. + *next_arg = -1; + return ConsumeConversion<true>(original_pos, end, conv, next_arg); + } + conv->flags = conv->flags | Flags::kNonBasic; + conv->width.set_value(maybe_width); + } else if (c == '*') { + conv->flags = conv->flags | Flags::kNonBasic; + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + if (is_positional) { + if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr; + conv->width.set_from_arg(ParseDigits(c, pos, end)); + if (ABSL_PREDICT_FALSE(c != '$')) return nullptr; + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + } else { + conv->width.set_from_arg(++*next_arg); + } + } + } + + if (c == '.') { + conv->flags = conv->flags | Flags::kNonBasic; + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + if ('0' <= c && c <= '9') { + conv->precision.set_value(ParseDigits(c, pos, end)); + } else if (c == '*') { + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + if (is_positional) { + if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr; + conv->precision.set_from_arg(ParseDigits(c, pos, end)); + if (c != '$') return nullptr; + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + } else { + conv->precision.set_from_arg(++*next_arg); + } + } else { + conv->precision.set_value(0); + } + } + } + + auto tag = GetTagForChar(c); + + if (ABSL_PREDICT_FALSE(c == 'v' && conv->flags != Flags::kBasic)) { + return nullptr; + } + + if (ABSL_PREDICT_FALSE(!tag.is_conv())) { + if (ABSL_PREDICT_FALSE(!tag.is_length())) return nullptr; + + // It is a length modifier. + using str_format_internal::LengthMod; + LengthMod length_mod = tag.as_length(); + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + if (c == 'h' && length_mod == LengthMod::h) { + conv->length_mod = LengthMod::hh; + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + } else if (c == 'l' && length_mod == LengthMod::l) { + conv->length_mod = LengthMod::ll; + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + } else { + conv->length_mod = length_mod; + } + tag = GetTagForChar(c); + + if (ABSL_PREDICT_FALSE(c == 'v')) return nullptr; + if (ABSL_PREDICT_FALSE(!tag.is_conv())) return nullptr; + } + + assert(CheckFastPathSetting(*conv)); + (void)(&CheckFastPathSetting); + + conv->conv = tag.as_conv(); + if (!is_positional) conv->arg_position = ++*next_arg; + return pos; +} + +// Consume conversion spec prefix (not including '%') of [p, end) if valid. +// Examples of valid specs would be e.g.: "s", "d", "-12.6f". +// If valid, it returns the first character following the conversion spec, +// and the spec part is broken down and returned in 'conv'. +// If invalid, returns nullptr. +constexpr const char* ConsumeUnboundConversion(const char* p, const char* end, + UnboundConversion* conv, + int* next_arg) { + if (*next_arg < 0) return ConsumeConversion<true>(p, end, conv, next_arg); + return ConsumeConversion<false>(p, end, conv, next_arg); +} + +} // namespace str_format_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_CONSTEXPR_PARSER_H_ diff --git a/absl/strings/internal/str_format/parser.cc b/absl/strings/internal/str_format/parser.cc index 13731ee2..5aaab698 100644 --- a/absl/strings/internal/str_format/parser.cc +++ b/absl/strings/internal/str_format/parser.cc @@ -31,211 +31,14 @@ namespace absl { ABSL_NAMESPACE_BEGIN namespace str_format_internal { -using CC = FormatConversionCharInternal; -using LM = LengthMod; +// Define the array for non-constexpr uses. +constexpr ConvTag ConvTagHolder::value[256]; -// Abbreviations to fit in the table below. -constexpr auto f_sign = Flags::kSignCol; -constexpr auto f_alt = Flags::kAlt; -constexpr auto f_pos = Flags::kShowPos; -constexpr auto f_left = Flags::kLeft; -constexpr auto f_zero = Flags::kZero; - -ABSL_CONST_INIT const ConvTag kTags[256] = { - {}, {}, {}, {}, {}, {}, {}, {}, // 00-07 - {}, {}, {}, {}, {}, {}, {}, {}, // 08-0f - {}, {}, {}, {}, {}, {}, {}, {}, // 10-17 - {}, {}, {}, {}, {}, {}, {}, {}, // 18-1f - f_sign, {}, {}, f_alt, {}, {}, {}, {}, // !"#$%&' - {}, {}, {}, f_pos, {}, f_left, {}, {}, // ()*+,-./ - f_zero, {}, {}, {}, {}, {}, {}, {}, // 01234567 - {}, {}, {}, {}, {}, {}, {}, {}, // 89:;<=>? - {}, CC::A, {}, {}, {}, CC::E, CC::F, CC::G, // @ABCDEFG - {}, {}, {}, {}, LM::L, {}, {}, {}, // HIJKLMNO - {}, {}, {}, {}, {}, {}, {}, {}, // PQRSTUVW - CC::X, {}, {}, {}, {}, {}, {}, {}, // XYZ[\]^_ - {}, CC::a, {}, CC::c, CC::d, CC::e, CC::f, CC::g, // `abcdefg - LM::h, CC::i, LM::j, {}, LM::l, {}, CC::n, CC::o, // hijklmno - CC::p, LM::q, {}, CC::s, LM::t, CC::u, CC::v, {}, // pqrstuvw - CC::x, {}, LM::z, {}, {}, {}, {}, {}, // xyz{|}! - {}, {}, {}, {}, {}, {}, {}, {}, // 80-87 - {}, {}, {}, {}, {}, {}, {}, {}, // 88-8f - {}, {}, {}, {}, {}, {}, {}, {}, // 90-97 - {}, {}, {}, {}, {}, {}, {}, {}, // 98-9f - {}, {}, {}, {}, {}, {}, {}, {}, // a0-a7 - {}, {}, {}, {}, {}, {}, {}, {}, // a8-af - {}, {}, {}, {}, {}, {}, {}, {}, // b0-b7 - {}, {}, {}, {}, {}, {}, {}, {}, // b8-bf - {}, {}, {}, {}, {}, {}, {}, {}, // c0-c7 - {}, {}, {}, {}, {}, {}, {}, {}, // c8-cf - {}, {}, {}, {}, {}, {}, {}, {}, // d0-d7 - {}, {}, {}, {}, {}, {}, {}, {}, // d8-df - {}, {}, {}, {}, {}, {}, {}, {}, // e0-e7 - {}, {}, {}, {}, {}, {}, {}, {}, // e8-ef - {}, {}, {}, {}, {}, {}, {}, {}, // f0-f7 - {}, {}, {}, {}, {}, {}, {}, {}, // f8-ff -}; - -namespace { - -bool CheckFastPathSetting(const UnboundConversion& conv) { - bool width_precision_needed = - conv.width.value() >= 0 || conv.precision.value() >= 0; - if (width_precision_needed && conv.flags == Flags::kBasic) { - fprintf(stderr, - "basic=%d left=%d show_pos=%d sign_col=%d alt=%d zero=%d " - "width=%d precision=%d\n", - conv.flags == Flags::kBasic ? 1 : 0, - FlagsContains(conv.flags, Flags::kLeft) ? 1 : 0, - FlagsContains(conv.flags, Flags::kShowPos) ? 1 : 0, - FlagsContains(conv.flags, Flags::kSignCol) ? 1 : 0, - FlagsContains(conv.flags, Flags::kAlt) ? 1 : 0, - FlagsContains(conv.flags, Flags::kZero) ? 1 : 0, conv.width.value(), - conv.precision.value()); - return false; - } - return true; -} - -template <bool is_positional> -const char *ConsumeConversion(const char *pos, const char *const end, - UnboundConversion *conv, int *next_arg) { - const char* const original_pos = pos; - char c; - // Read the next char into `c` and update `pos`. Returns false if there are - // no more chars to read. -#define ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR() \ - do { \ - if (ABSL_PREDICT_FALSE(pos == end)) return nullptr; \ - c = *pos++; \ - } while (0) - - const auto parse_digits = [&] { - int digits = c - '0'; - // We do not want to overflow `digits` so we consume at most digits10 - // digits. If there are more digits the parsing will fail later on when the - // digit doesn't match the expected characters. - int num_digits = std::numeric_limits<int>::digits10; - for (;;) { - if (ABSL_PREDICT_FALSE(pos == end)) break; - c = *pos++; - if (!std::isdigit(c)) break; - --num_digits; - if (ABSL_PREDICT_FALSE(!num_digits)) break; - digits = 10 * digits + c - '0'; - } - return digits; - }; - - if (is_positional) { - ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); - if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr; - conv->arg_position = parse_digits(); - assert(conv->arg_position > 0); - if (ABSL_PREDICT_FALSE(c != '$')) return nullptr; - } - - ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); - - // We should start with the basic flag on. - assert(conv->flags == Flags::kBasic); - - // Any non alpha character makes this conversion not basic. - // This includes flags (-+ #0), width (1-9, *) or precision (.). - // All conversion characters and length modifiers are alpha characters. - if (c < 'A') { - while (c <= '0') { - auto tag = GetTagForChar(c); - if (tag.is_flags()) { - conv->flags = conv->flags | tag.as_flags(); - ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); - } else { - break; - } - } - - if (c <= '9') { - if (c >= '0') { - int maybe_width = parse_digits(); - if (!is_positional && c == '$') { - if (ABSL_PREDICT_FALSE(*next_arg != 0)) return nullptr; - // Positional conversion. - *next_arg = -1; - return ConsumeConversion<true>(original_pos, end, conv, next_arg); - } - conv->flags = conv->flags | Flags::kNonBasic; - conv->width.set_value(maybe_width); - } else if (c == '*') { - conv->flags = conv->flags | Flags::kNonBasic; - ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); - if (is_positional) { - if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr; - conv->width.set_from_arg(parse_digits()); - if (ABSL_PREDICT_FALSE(c != '$')) return nullptr; - ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); - } else { - conv->width.set_from_arg(++*next_arg); - } - } - } - - if (c == '.') { - conv->flags = conv->flags | Flags::kNonBasic; - ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); - if (std::isdigit(c)) { - conv->precision.set_value(parse_digits()); - } else if (c == '*') { - ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); - if (is_positional) { - if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr; - conv->precision.set_from_arg(parse_digits()); - if (c != '$') return nullptr; - ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); - } else { - conv->precision.set_from_arg(++*next_arg); - } - } else { - conv->precision.set_value(0); - } - } - } - - auto tag = GetTagForChar(c); - - if (ABSL_PREDICT_FALSE(c == 'v' && (pos - original_pos) != 1)) return nullptr; - - if (ABSL_PREDICT_FALSE(!tag.is_conv())) { - if (ABSL_PREDICT_FALSE(!tag.is_length())) return nullptr; - - // It is a length modifier. - using str_format_internal::LengthMod; - LengthMod length_mod = tag.as_length(); - ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); - if (c == 'h' && length_mod == LengthMod::h) { - conv->length_mod = LengthMod::hh; - ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); - } else if (c == 'l' && length_mod == LengthMod::l) { - conv->length_mod = LengthMod::ll; - ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); - } else { - conv->length_mod = length_mod; - } - tag = GetTagForChar(c); - - if (ABSL_PREDICT_FALSE(c == 'v')) return nullptr; - if (ABSL_PREDICT_FALSE(!tag.is_conv())) return nullptr; - } - - assert(CheckFastPathSetting(*conv)); - (void)(&CheckFastPathSetting); - - conv->conv = tag.as_conv(); - if (!is_positional) conv->arg_position = ++*next_arg; - return pos; +ABSL_ATTRIBUTE_NOINLINE const char* ConsumeUnboundConversionNoInline( + const char* p, const char* end, UnboundConversion* conv, int* next_arg) { + return ConsumeUnboundConversion(p, end, conv, next_arg); } -} // namespace - std::string LengthModToString(LengthMod v) { switch (v) { case LengthMod::h: @@ -262,12 +65,6 @@ std::string LengthModToString(LengthMod v) { return ""; } -const char *ConsumeUnboundConversion(const char *p, const char *end, - UnboundConversion *conv, int *next_arg) { - if (*next_arg < 0) return ConsumeConversion<true>(p, end, conv, next_arg); - return ConsumeConversion<false>(p, end, conv, next_arg); -} - struct ParsedFormatBase::ParsedFormatConsumer { explicit ParsedFormatConsumer(ParsedFormatBase *parsedformat) : parsed(parsedformat), data_pos(parsedformat->data_.get()) {} diff --git a/absl/strings/internal/str_format/parser.h b/absl/strings/internal/str_format/parser.h index a81bac83..35b6d49c 100644 --- a/absl/strings/internal/str_format/parser.h +++ b/absl/strings/internal/str_format/parser.h @@ -29,111 +29,18 @@ #include <vector> #include "absl/strings/internal/str_format/checker.h" +#include "absl/strings/internal/str_format/constexpr_parser.h" #include "absl/strings/internal/str_format/extension.h" namespace absl { ABSL_NAMESPACE_BEGIN namespace str_format_internal { -enum class LengthMod : std::uint8_t { h, hh, l, ll, L, j, z, t, q, none }; - std::string LengthModToString(LengthMod v); -// The analyzed properties of a single specified conversion. -struct UnboundConversion { - UnboundConversion() {} - - class InputValue { - public: - void set_value(int value) { - assert(value >= 0); - value_ = value; - } - int value() const { return value_; } - - // Marks the value as "from arg". aka the '*' format. - // Requires `value >= 1`. - // When set, is_from_arg() return true and get_from_arg() returns the - // original value. - // `value()`'s return value is unspecfied in this state. - void set_from_arg(int value) { - assert(value > 0); - value_ = -value - 1; - } - bool is_from_arg() const { return value_ < -1; } - int get_from_arg() const { - assert(is_from_arg()); - return -value_ - 1; - } - - private: - int value_ = -1; - }; - - // No need to initialize. It will always be set in the parser. - int arg_position; - - InputValue width; - InputValue precision; - - Flags flags = Flags::kBasic; - LengthMod length_mod = LengthMod::none; - FormatConversionChar conv = FormatConversionCharInternal::kNone; -}; - -// Consume conversion spec prefix (not including '%') of [p, end) if valid. -// Examples of valid specs would be e.g.: "s", "d", "-12.6f". -// If valid, it returns the first character following the conversion spec, -// and the spec part is broken down and returned in 'conv'. -// If invalid, returns nullptr. -const char* ConsumeUnboundConversion(const char* p, const char* end, - UnboundConversion* conv, int* next_arg); - -// Helper tag class for the table below. -// It allows fast `char -> ConversionChar/LengthMod/Flags` checking and -// conversions. -class ConvTag { - public: - constexpr ConvTag(FormatConversionChar conversion_char) // NOLINT - : tag_(static_cast<uint8_t>(conversion_char)) {} - constexpr ConvTag(LengthMod length_mod) // NOLINT - : tag_(0x80 | static_cast<uint8_t>(length_mod)) {} - constexpr ConvTag(Flags flags) // NOLINT - : tag_(0xc0 | static_cast<uint8_t>(flags)) {} - constexpr ConvTag() : tag_(0xFF) {} - - bool is_conv() const { return (tag_ & 0x80) == 0; } - bool is_length() const { return (tag_ & 0xC0) == 0x80; } - bool is_flags() const { return (tag_ & 0xE0) == 0xC0; } - - FormatConversionChar as_conv() const { - assert(is_conv()); - assert(!is_length()); - assert(!is_flags()); - return static_cast<FormatConversionChar>(tag_); - } - LengthMod as_length() const { - assert(!is_conv()); - assert(is_length()); - assert(!is_flags()); - return static_cast<LengthMod>(tag_ & 0x3F); - } - Flags as_flags() const { - assert(!is_conv()); - assert(!is_length()); - assert(is_flags()); - return static_cast<Flags>(tag_ & 0x1F); - } - - private: - uint8_t tag_; -}; - -extern const ConvTag kTags[256]; -// Keep a single table for all the conversion chars and length modifiers. -inline ConvTag GetTagForChar(char c) { - return kTags[static_cast<unsigned char>(c)]; -} +const char* ConsumeUnboundConversionNoInline(const char* p, const char* end, + UnboundConversion* conv, + int* next_arg); // Parse the format string provided in 'src' and pass the identified items into // 'consumer'. @@ -187,7 +94,7 @@ bool ParseFormatString(string_view src, Consumer consumer) { } } else if (percent[1] != '%') { UnboundConversion conv; - p = ConsumeUnboundConversion(percent + 1, end, &conv, &next_arg); + p = ConsumeUnboundConversionNoInline(percent + 1, end, &conv, &next_arg); if (ABSL_PREDICT_FALSE(p == nullptr)) return false; if (ABSL_PREDICT_FALSE(!consumer.ConvertOne( conv, string_view(percent + 1, diff --git a/absl/strings/internal/str_format/parser_test.cc b/absl/strings/internal/str_format/parser_test.cc index c3e825fe..021f6a87 100644 --- a/absl/strings/internal/str_format/parser_test.cc +++ b/absl/strings/internal/str_format/parser_test.cc @@ -117,6 +117,7 @@ TEST_F(ConsumeUnboundConversionTest, ConsumeSpecification) { {__LINE__, "dzz", "d", "zz"}, // length mod as suffix {__LINE__, "3v", "", "3v"}, // 'v' cannot have modifiers {__LINE__, "hv", "", "hv"}, // 'v' cannot have modifiers + {__LINE__, "1$v", "1$v", ""}, // 'v' can have use posix syntax {__LINE__, "1$*2$d", "1$*2$d", "" }, // arg indexing and * allowed. {__LINE__, "0-14.3hhd", "0-14.3hhd", ""}, // precision, width {__LINE__, " 0-+#14.3hhd", " 0-+#14.3hhd", ""}, // flags diff --git a/absl/strings/internal/stringify_sink.cc b/absl/strings/internal/stringify_sink.cc new file mode 100644 index 00000000..7c6995ab --- /dev/null +++ b/absl/strings/internal/stringify_sink.cc @@ -0,0 +1,28 @@ +// Copyright 2022 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/stringify_sink.h" +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +void StringifySink::Append(size_t count, char ch) { buffer_.append(count, ch); } + +void StringifySink::Append(string_view v) { + buffer_.append(v.data(), v.size()); +} + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/strings/internal/stringify_sink.h b/absl/strings/internal/stringify_sink.h index a83f70e4..fc3747bb 100644 --- a/absl/strings/internal/stringify_sink.h +++ b/absl/strings/internal/stringify_sink.h @@ -31,8 +31,6 @@ class StringifySink { void Append(string_view v); - bool PutPaddedString(string_view v, int width, int precision, bool left); - // Support `absl::Format(&sink, format, args...)`. friend void AbslFormatFlush(StringifySink* sink, absl::string_view v) { sink->Append(v); @@ -51,15 +49,6 @@ string_view ExtractStringification(StringifySink& sink, const T& v) { return sink.buffer_; } -template <typename T, typename = void> -struct HasAbslStringify : std::false_type {}; - -template <typename T> -struct HasAbslStringify<T, std::enable_if_t<std::is_void<decltype(AbslStringify( - std::declval<strings_internal::StringifySink&>(), - std::declval<const T&>()))>::value>> - : std::true_type {}; - } // namespace strings_internal ABSL_NAMESPACE_END diff --git a/absl/strings/str_cat.cc b/absl/strings/str_cat.cc index 6981347a..e5cb6d84 100644 --- a/absl/strings/str_cat.cc +++ b/absl/strings/str_cat.cc @@ -30,39 +30,6 @@ namespace absl { ABSL_NAMESPACE_BEGIN -namespace strings_internal { -void StringifySink::Append(size_t count, char ch) { buffer_.append(count, ch); } - -void StringifySink::Append(string_view v) { - buffer_.append(v.data(), v.size()); -} - -bool StringifySink::PutPaddedString(string_view v, int width, int precision, - bool left) { - size_t space_remaining = 0; - - if (width >= 0) space_remaining = static_cast<size_t>(width); - - size_t n = v.size(); - - if (precision >= 0) n = (std::min)(n, static_cast<size_t>(precision)); - - string_view shown(v.data(), n); - - if (shown.size() < space_remaining) { - space_remaining = space_remaining - shown.size(); - } else { - space_remaining = 0; - } - - if (!left) Append(space_remaining, ' '); - Append(shown); - if (left) Append(space_remaining, ' '); - return true; -} - -} // namespace strings_internal - AlphaNum::AlphaNum(Hex hex) { static_assert(numbers_internal::kFastToBufferSize >= 32, "This function only works when output buffer >= 32 bytes long"); diff --git a/absl/strings/str_cat.h b/absl/strings/str_cat.h index 1a37faae..5ee26db0 100644 --- a/absl/strings/str_cat.h +++ b/absl/strings/str_cat.h @@ -95,6 +95,7 @@ #include <vector> #include "absl/base/port.h" +#include "absl/strings/internal/has_absl_stringify.h" #include "absl/strings/internal/stringify_sink.h" #include "absl/strings/numbers.h" #include "absl/strings/string_view.h" @@ -317,7 +318,8 @@ class AlphaNum { // This overload matches only scoped enums. template <typename T, typename = typename std::enable_if< - std::is_enum<T>{} && !std::is_convertible<T, int>{}>::type> + std::is_enum<T>{} && !std::is_convertible<T, int>{} && + !strings_internal::HasAbslStringify<T>::value>::type> AlphaNum(T e) // NOLINT(runtime/explicit) : AlphaNum(static_cast<typename std::underlying_type<T>::type>(e)) {} diff --git a/absl/strings/str_cat_test.cc b/absl/strings/str_cat_test.cc index 1b3b7ece..c3fb3170 100644 --- a/absl/strings/str_cat_test.cc +++ b/absl/strings/str_cat_test.cc @@ -650,4 +650,16 @@ TEST(StrCat, AbslStringifyExampleUsingFormat) { EXPECT_EQ(absl::StrCat("a ", p, " z"), "a (10, 20) z"); } +enum class EnumWithStringify { Many = 0, Choices = 1 }; + +template <typename Sink> +void AbslStringify(Sink& sink, EnumWithStringify e) { + absl::Format(&sink, "%s", e == EnumWithStringify::Many ? "Many" : "Choices"); +} + +TEST(StrCat, AbslStringifyWithEnum) { + const auto e = EnumWithStringify::Choices; + EXPECT_EQ(absl::StrCat(e), "Choices"); +} + } // namespace diff --git a/absl/strings/str_format.h b/absl/strings/str_format.h index 43d86186..3536b70e 100644 --- a/absl/strings/str_format.h +++ b/absl/strings/str_format.h @@ -191,9 +191,9 @@ class FormatCountCapture { // absl::StrFormat(formatString, "TheVillage", 6); // // A format string generally follows the POSIX syntax as used within the POSIX -// `printf` specification. +// `printf` specification. (Exceptions are noted below.) // -// (See http://pubs.opengroup.org/onlinepubs/9699919799/functions/fprintf.html.) +// (See http://pubs.opengroup.org/onlinepubs/9699919799/functions/fprintf.html) // // In specific, the `FormatSpec` supports the following type specifiers: // * `c` for characters @@ -211,6 +211,10 @@ class FormatCountCapture { // * `n` for the special case of writing out the number of characters // written to this point. The resulting value must be captured within an // `absl::FormatCountCapture` type. +// * `v` for values using the default format for a deduced type. These deduced +// types include many of the primitive types denoted here as well as +// user-defined types containing the proper extensions. (See below for more +// information.) // // Implementation-defined behavior: // * A null pointer provided to "%s" or "%p" is output as "(nil)". @@ -239,6 +243,15 @@ class FormatCountCapture { // "%s%d%n", "hello", 123, absl::FormatCountCapture(&n)); // EXPECT_EQ(8, n); // +// NOTE: the `v` specifier (for "value") is a type specifier not present in the +// POSIX specification. %v will format values according to their deduced type. +// `v` uses `d` for signed integer values, `u` for unsigned integer values, `g` +// for floating point values, and formats boolean values as "true"/"false" +// (instead of 1 or 0 for booleans formatted using d). `const char*` is not +// supported; please use `std:string` and `string_view`. `char` is also not +// supported due to ambiguity of the type. This specifier does not support +// modifiers. +// // The `FormatSpec` intrinsically supports all of these fundamental C++ types: // // * Characters: `char`, `signed char`, `unsigned char` @@ -807,17 +820,25 @@ enum class FormatConversionCharSet : uint64_t { // FormatSink // -// An abstraction to which conversions write their string data. +// A format sink is a generic abstraction to which conversions may write their +// formatted string data. `absl::FormatConvert()` uses this sink to write its +// formatted string. // class FormatSink { public: - // Appends `count` copies of `ch`. + // FormatSink::Append() + // + // Appends `count` copies of `ch` to the format sink. void Append(size_t count, char ch) { sink_->Append(count, ch); } + // Overload of FormatSink::Append() for appending the characters of a string + // view to a format sink. void Append(string_view v) { sink_->Append(v); } - // Appends the first `precision` bytes of `v`. If this is less than - // `width`, spaces will be appended first (if `left` is false), or + // FormatSink::PutPaddedString() + // + // Appends `precision` number of bytes of `v` to the format sink. If this is + // less than `width`, spaces will be appended first (if `left` is false), or // after (if `left` is true) to ensure the total amount appended is // at least `width`. bool PutPaddedString(string_view v, int width, int precision, bool left) { diff --git a/absl/strings/str_format_test.cc b/absl/strings/str_format_test.cc index 62ed262d..5198fb33 100644 --- a/absl/strings/str_format_test.cc +++ b/absl/strings/str_format_test.cc @@ -143,13 +143,20 @@ TEST_F(FormatEntryPointTest, AppendFormatFailWithV) { } TEST_F(FormatEntryPointTest, ManyArgs) { - EXPECT_EQ("24", StrFormat("%24$d", 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, - 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24)); - EXPECT_EQ("60", StrFormat("%60$d", 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, - 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, - 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, - 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, - 53, 54, 55, 56, 57, 58, 59, 60)); + EXPECT_EQ( + "60 59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42 41 40 39 38 37 " + "36 35 34 33 32 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 " + "12 11 10 9 8 7 6 5 4 3 2 1", + StrFormat("%60$d %59$d %58$d %57$d %56$d %55$d %54$d %53$d %52$d %51$d " + "%50$d %49$d %48$d %47$d %46$d %45$d %44$d %43$d %42$d %41$d " + "%40$d %39$d %38$d %37$d %36$d %35$d %34$d %33$d %32$d %31$d " + "%30$d %29$d %28$d %27$d %26$d %25$d %24$d %23$d %22$d %21$d " + "%20$d %19$d %18$d %17$d %16$d %15$d %14$d %13$d %12$d %11$d " + "%10$d %9$d %8$d %7$d %6$d %5$d %4$d %3$d %2$d %1$d", + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, + 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, + 51, 52, 53, 54, 55, 56, 57, 58, 59, 60)); } TEST_F(FormatEntryPointTest, Preparsed) { @@ -1135,6 +1142,51 @@ TEST_F(FormatExtensionTest, AbslStringifyExampleUsingFormat) { EXPECT_EQ(absl::StrFormat("a %v z", p), "a (10, 20) z"); } +enum class EnumClassWithStringify { Many = 0, Choices = 1 }; + +template <typename Sink> +void AbslStringify(Sink& sink, EnumClassWithStringify e) { + absl::Format(&sink, "%s", + e == EnumClassWithStringify::Many ? "Many" : "Choices"); +} + +enum EnumWithStringify { Many, Choices }; + +template <typename Sink> +void AbslStringify(Sink& sink, EnumWithStringify e) { + absl::Format(&sink, "%s", e == EnumWithStringify::Many ? "Many" : "Choices"); +} + +TEST_F(FormatExtensionTest, AbslStringifyWithEnumWithV) { + const auto e_class = EnumClassWithStringify::Choices; + EXPECT_EQ(absl::StrFormat("My choice is %v", e_class), + "My choice is Choices"); + + const auto e = EnumWithStringify::Choices; + EXPECT_EQ(absl::StrFormat("My choice is %v", e), "My choice is Choices"); +} + +TEST_F(FormatExtensionTest, AbslStringifyEnumWithD) { + const auto e_class = EnumClassWithStringify::Many; + EXPECT_EQ(absl::StrFormat("My choice is %d", e_class), "My choice is 0"); + + const auto e = EnumWithStringify::Choices; + EXPECT_EQ(absl::StrFormat("My choice is %d", e), "My choice is 1"); +} + +enum class EnumWithLargerValue { x = 32 }; + +template <typename Sink> +void AbslStringify(Sink& sink, EnumWithLargerValue e) { + absl::Format(&sink, "%s", "Many"); +} + +TEST_F(FormatExtensionTest, AbslStringifyEnumOtherSpecifiers) { + const auto e = EnumWithLargerValue::x; + EXPECT_EQ(absl::StrFormat("My choice is %g", e), "My choice is 32"); + EXPECT_EQ(absl::StrFormat("My choice is %x", e), "My choice is 20"); +} + } // namespace // Some codegen thunks that we can use to easily dump the generated assembly for diff --git a/absl/strings/substitute.h b/absl/strings/substitute.h index 5c3f6eff..d6a5a690 100644 --- a/absl/strings/substitute.h +++ b/absl/strings/substitute.h @@ -203,7 +203,8 @@ class Arg { // This overload matches only scoped enums. template <typename T, typename = typename std::enable_if< - std::is_enum<T>{} && !std::is_convertible<T, int>{}>::type> + std::is_enum<T>{} && !std::is_convertible<T, int>{} && + !strings_internal::HasAbslStringify<T>::value>::type> Arg(T value) // NOLINT(google-explicit-constructor) : Arg(static_cast<typename std::underlying_type<T>::type>(value)) {} diff --git a/absl/strings/substitute_test.cc b/absl/strings/substitute_test.cc index 9f04545f..9cb37c39 100644 --- a/absl/strings/substitute_test.cc +++ b/absl/strings/substitute_test.cc @@ -253,6 +253,18 @@ TEST(SubstituteTest, Enums) { ScopedEnumUInt16::kEnum1)); } +enum class EnumWithStringify { Many = 0, Choices = 1 }; + +template <typename Sink> +void AbslStringify(Sink& sink, EnumWithStringify e) { + sink.Append(e == EnumWithStringify::Many ? "Many" : "Choices"); +} + +TEST(SubstituteTest, AbslStringifyWithEnum) { + const auto e = EnumWithStringify::Choices; + EXPECT_EQ(absl::Substitute("$0", e), "Choices"); +} + #ifdef GTEST_HAS_DEATH_TEST TEST(SubstituteDeathTest, SubstituteDeath) { |