diff options
Diffstat (limited to 'absl/strings/internal')
22 files changed, 753 insertions, 761 deletions
diff --git a/absl/strings/internal/char_map.h b/absl/strings/internal/char_map.h index 5aabc1fc..70a90343 100644 --- a/absl/strings/internal/char_map.h +++ b/absl/strings/internal/char_map.h @@ -73,10 +73,10 @@ class Charmap { } // Containing all the chars in the C-string 's'. - // Note that this is expensively recursive because of the C++11 constexpr - // formulation. Use only in constexpr initializers. static constexpr Charmap FromString(const char* s) { - return *s == 0 ? Charmap() : (Char(*s) | FromString(s + 1)); + Charmap ret; + while (*s) ret = ret | Char(*s++); + return ret; } // Containing all the chars in the closed interval [lo,hi]. diff --git a/absl/strings/internal/cord_internal.h b/absl/strings/internal/cord_internal.h index fcca3a28..6022c1df 100644 --- a/absl/strings/internal/cord_internal.h +++ b/absl/strings/internal/cord_internal.h @@ -225,7 +225,11 @@ struct CordRep { : length(l), refcount(immortal), tag(EXTERNAL), storage{} {} // The following three fields have to be less than 32 bytes since - // that is the smallest supported flat node size. + // that is the smallest supported flat node size. Some code optimizations rely + // on the specific layout of these fields. Notably: the non-trivial field + // `refcount` being preceeded by `length`, and being tailed by POD data + // members only. + // # LINT.IfChange size_t length; RefcountAndFlags refcount; // If tag < FLAT, it represents CordRepKind and indicates the type of node. @@ -241,6 +245,7 @@ struct CordRep { // allocate room for these in the derived class, as not all compilers reuse // padding space from the base class (clang and gcc do, MSVC does not, etc) uint8_t storage[3]; + // # LINT.ThenChange(cord_rep_btree.h:copy_raw) // Returns true if this instance's tag matches the requested type. constexpr bool IsRing() const { return tag == RING; } diff --git a/absl/strings/internal/cord_rep_btree.cc b/absl/strings/internal/cord_rep_btree.cc index 7ce36128..985f0724 100644 --- a/absl/strings/internal/cord_rep_btree.cc +++ b/absl/strings/internal/cord_rep_btree.cc @@ -502,7 +502,7 @@ OpResult CordRepBtree::SetEdge(bool owned, CordRep* edge, size_t delta) { // open interval [begin, back) or [begin + 1, end) depending on `edge_type`. // We conveniently cover both case using a constexpr `shift` being 0 or 1 // as `end :== back + 1`. - result = {CopyRaw(), kCopied}; + result = {CopyRaw(length), kCopied}; constexpr int shift = edge_type == kFront ? 1 : 0; for (CordRep* r : Edges(begin() + shift, back() + shift)) { CordRep::Ref(r); diff --git a/absl/strings/internal/cord_rep_btree.h b/absl/strings/internal/cord_rep_btree.h index eed5609e..4209e512 100644 --- a/absl/strings/internal/cord_rep_btree.h +++ b/absl/strings/internal/cord_rep_btree.h @@ -446,9 +446,9 @@ class CordRepBtree : public CordRep { template <EdgeType edge_type> static CordRepBtree* NewLeaf(absl::string_view data, size_t extra); - // Creates a raw copy of this Btree node, copying all properties, but - // without adding any references to existing edges. - CordRepBtree* CopyRaw() const; + // Creates a raw copy of this Btree node with the specified length, copying + // all properties, but without adding any references to existing edges. + CordRepBtree* CopyRaw(size_t new_length) const; // Creates a full copy of this Btree node, adding a reference on all edges. CordRepBtree* Copy() const; @@ -666,15 +666,28 @@ inline void CordRepBtree::Unref(absl::Span<CordRep* const> edges) { } } -inline CordRepBtree* CordRepBtree::CopyRaw() const { - auto* tree = static_cast<CordRepBtree*>(::operator new(sizeof(CordRepBtree))); - memcpy(static_cast<void*>(tree), this, sizeof(CordRepBtree)); - new (&tree->refcount) RefcountAndFlags; +inline CordRepBtree* CordRepBtree::CopyRaw(size_t new_length) const { + CordRepBtree* tree = new CordRepBtree; + + // `length` and `refcount` are the first members of `CordRepBtree`. + // We initialize `length` using the given length, have `refcount` be set to + // ref = 1 through its default constructor, and copy all data beyond + // 'refcount' which starts with `tag` using a single memcpy: all contents + // except `refcount` is trivially copyable, and the compiler does not + // efficiently coalesce member-wise copy of these members. + // See https://gcc.godbolt.org/z/qY8zsca6z + // # LINT.IfChange(copy_raw) + tree->length = new_length; + uint8_t* dst = &tree->tag; + const uint8_t* src = &tag; + const ptrdiff_t offset = src - reinterpret_cast<const uint8_t*>(this); + memcpy(dst, src, sizeof(CordRepBtree) - static_cast<size_t>(offset)); return tree; + // # LINT.ThenChange() } inline CordRepBtree* CordRepBtree::Copy() const { - CordRepBtree* tree = CopyRaw(); + CordRepBtree* tree = CopyRaw(length); for (CordRep* rep : Edges()) CordRep::Ref(rep); return tree; } @@ -683,8 +696,7 @@ inline CordRepBtree* CordRepBtree::CopyToEndFrom(size_t begin, size_t new_length) const { assert(begin >= this->begin()); assert(begin <= this->end()); - CordRepBtree* tree = CopyRaw(); - tree->length = new_length; + CordRepBtree* tree = CopyRaw(new_length); tree->set_begin(begin); for (CordRep* edge : tree->Edges()) CordRep::Ref(edge); return tree; @@ -694,8 +706,7 @@ inline CordRepBtree* CordRepBtree::CopyBeginTo(size_t end, size_t new_length) const { assert(end <= capacity()); assert(end >= this->begin()); - CordRepBtree* tree = CopyRaw(); - tree->length = new_length; + CordRepBtree* tree = CopyRaw(new_length); tree->set_end(end); for (CordRep* edge : tree->Edges()) CordRep::Ref(edge); return tree; diff --git a/absl/strings/internal/cord_rep_crc.cc b/absl/strings/internal/cord_rep_crc.cc index ee140354..dbe54cc4 100644 --- a/absl/strings/internal/cord_rep_crc.cc +++ b/absl/strings/internal/cord_rep_crc.cc @@ -16,6 +16,7 @@ #include <cassert> #include <cstdint> +#include <utility> #include "absl/base/config.h" #include "absl/strings/internal/cord_internal.h" @@ -24,11 +25,10 @@ namespace absl { ABSL_NAMESPACE_BEGIN namespace cord_internal { -CordRepCrc* CordRepCrc::New(CordRep* child, uint32_t crc) { - assert(child != nullptr); - if (child->IsCrc()) { +CordRepCrc* CordRepCrc::New(CordRep* child, crc_internal::CrcCordState state) { + if (child != nullptr && child->IsCrc()) { if (child->refcount.IsOne()) { - child->crc()->crc = crc; + child->crc()->crc_cord_state = std::move(state); return child->crc(); } CordRep* old = child; @@ -37,15 +37,17 @@ CordRepCrc* CordRepCrc::New(CordRep* child, uint32_t crc) { CordRep::Unref(old); } auto* new_cordrep = new CordRepCrc; - new_cordrep->length = child->length; + new_cordrep->length = child != nullptr ? child->length : 0; new_cordrep->tag = cord_internal::CRC; new_cordrep->child = child; - new_cordrep->crc = crc; + new_cordrep->crc_cord_state = std::move(state); return new_cordrep; } void CordRepCrc::Destroy(CordRepCrc* node) { - CordRep::Unref(node->child); + if (node->child != nullptr) { + CordRep::Unref(node->child); + } delete node; } diff --git a/absl/strings/internal/cord_rep_crc.h b/absl/strings/internal/cord_rep_crc.h index 5294b0d1..379d7a60 100644 --- a/absl/strings/internal/cord_rep_crc.h +++ b/absl/strings/internal/cord_rep_crc.h @@ -20,6 +20,7 @@ #include "absl/base/config.h" #include "absl/base/optimization.h" +#include "absl/crc/internal/crc_cord_state.h" #include "absl/strings/internal/cord_internal.h" namespace absl { @@ -34,14 +35,14 @@ namespace cord_internal { // the contained checksum is the user's responsibility. struct CordRepCrc : public CordRep { CordRep* child; - uint32_t crc; + absl::crc_internal::CrcCordState crc_cord_state; // Consumes `child` and returns a CordRepCrc prefixed tree containing `child`. // If the specified `child` is itself a CordRepCrc node, then this method - // either replaces the existing node, or directly updates the crc value in it + // either replaces the existing node, or directly updates the crc state in it // depending on the node being shared or not, i.e.: refcount.IsOne(). - // `child` must not be null. Never returns null. - static CordRepCrc* New(CordRep* child, uint32_t crc); + // `child` must only be null if the Cord is empty. Never returns null. + static CordRepCrc* New(CordRep* child, crc_internal::CrcCordState state); // Destroys (deletes) the provided node. `node` must not be null. static void Destroy(CordRepCrc* node); diff --git a/absl/strings/internal/cord_rep_crc_test.cc b/absl/strings/internal/cord_rep_crc_test.cc index d73ea7b3..3d27c33c 100644 --- a/absl/strings/internal/cord_rep_crc_test.cc +++ b/absl/strings/internal/cord_rep_crc_test.cc @@ -17,6 +17,7 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" #include "absl/base/config.h" +#include "absl/crc/internal/crc_cord_state.h" #include "absl/strings/internal/cord_internal.h" #include "absl/strings/internal/cord_rep_test_util.h" @@ -27,47 +28,51 @@ namespace { using ::absl::cordrep_testing::MakeFlat; using ::testing::Eq; +using ::testing::IsNull; using ::testing::Ne; #if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST -TEST(CordRepCrc, NewWithNullPtr) { - EXPECT_DEATH(CordRepCrc::New(nullptr, 0), ""); -} - TEST(CordRepCrc, RemoveCrcWithNullptr) { EXPECT_DEATH(RemoveCrcNode(nullptr), ""); } #endif // !NDEBUG && GTEST_HAS_DEATH_TEST +absl::crc_internal::CrcCordState MakeCrcCordState(uint32_t crc) { + crc_internal::CrcCordState state; + state.mutable_rep()->prefix_crc.push_back( + crc_internal::CrcCordState::PrefixCrc(42, crc32c_t{crc})); + return state; +} + TEST(CordRepCrc, NewDestroy) { CordRep* rep = cordrep_testing::MakeFlat("Hello world"); - CordRepCrc* crc = CordRepCrc::New(rep, 12345); + CordRepCrc* crc = CordRepCrc::New(rep, MakeCrcCordState(12345)); EXPECT_TRUE(crc->refcount.IsOne()); EXPECT_THAT(crc->child, Eq(rep)); - EXPECT_THAT(crc->crc, Eq(12345u)); + EXPECT_THAT(crc->crc_cord_state.Checksum(), Eq(crc32c_t{12345u})); EXPECT_TRUE(rep->refcount.IsOne()); CordRepCrc::Destroy(crc); } TEST(CordRepCrc, NewExistingCrcNotShared) { CordRep* rep = cordrep_testing::MakeFlat("Hello world"); - CordRepCrc* crc = CordRepCrc::New(rep, 12345); - CordRepCrc* new_crc = CordRepCrc::New(crc, 54321); + CordRepCrc* crc = CordRepCrc::New(rep, MakeCrcCordState(12345)); + CordRepCrc* new_crc = CordRepCrc::New(crc, MakeCrcCordState(54321)); EXPECT_THAT(new_crc, Eq(crc)); EXPECT_TRUE(new_crc->refcount.IsOne()); EXPECT_THAT(new_crc->child, Eq(rep)); - EXPECT_THAT(new_crc->crc, Eq(54321u)); + EXPECT_THAT(new_crc->crc_cord_state.Checksum(), Eq(crc32c_t{54321u})); EXPECT_TRUE(rep->refcount.IsOne()); CordRepCrc::Destroy(new_crc); } TEST(CordRepCrc, NewExistingCrcShared) { CordRep* rep = cordrep_testing::MakeFlat("Hello world"); - CordRepCrc* crc = CordRepCrc::New(rep, 12345); + CordRepCrc* crc = CordRepCrc::New(rep, MakeCrcCordState(12345)); CordRep::Ref(crc); - CordRepCrc* new_crc = CordRepCrc::New(crc, 54321); + CordRepCrc* new_crc = CordRepCrc::New(crc, MakeCrcCordState(54321)); EXPECT_THAT(new_crc, Ne(crc)); EXPECT_TRUE(new_crc->refcount.IsOne()); @@ -75,13 +80,23 @@ TEST(CordRepCrc, NewExistingCrcShared) { EXPECT_FALSE(rep->refcount.IsOne()); EXPECT_THAT(crc->child, Eq(rep)); EXPECT_THAT(new_crc->child, Eq(rep)); - EXPECT_THAT(crc->crc, Eq(12345u)); - EXPECT_THAT(new_crc->crc, Eq(54321u)); + EXPECT_THAT(crc->crc_cord_state.Checksum(), Eq(crc32c_t{12345u})); + EXPECT_THAT(new_crc->crc_cord_state.Checksum(), Eq(crc32c_t{54321u})); CordRep::Unref(crc); CordRep::Unref(new_crc); } +TEST(CordRepCrc, NewEmpty) { + CordRepCrc* crc = CordRepCrc::New(nullptr, MakeCrcCordState(12345)); + EXPECT_TRUE(crc->refcount.IsOne()); + EXPECT_THAT(crc->child, IsNull()); + EXPECT_THAT(crc->length, Eq(0u)); + EXPECT_THAT(crc->crc_cord_state.Checksum(), Eq(crc32c_t{12345u})); + EXPECT_TRUE(crc->refcount.IsOne()); + CordRepCrc::Destroy(crc); +} + TEST(CordRepCrc, RemoveCrcNotCrc) { CordRep* rep = cordrep_testing::MakeFlat("Hello world"); CordRep* nocrc = RemoveCrcNode(rep); @@ -91,7 +106,7 @@ TEST(CordRepCrc, RemoveCrcNotCrc) { TEST(CordRepCrc, RemoveCrcNotShared) { CordRep* rep = cordrep_testing::MakeFlat("Hello world"); - CordRepCrc* crc = CordRepCrc::New(rep, 12345); + CordRepCrc* crc = CordRepCrc::New(rep, MakeCrcCordState(12345)); CordRep* nocrc = RemoveCrcNode(crc); EXPECT_THAT(nocrc, Eq(rep)); EXPECT_TRUE(rep->refcount.IsOne()); @@ -100,7 +115,7 @@ TEST(CordRepCrc, RemoveCrcNotShared) { TEST(CordRepCrc, RemoveCrcShared) { CordRep* rep = cordrep_testing::MakeFlat("Hello world"); - CordRepCrc* crc = CordRepCrc::New(rep, 12345); + CordRepCrc* crc = CordRepCrc::New(rep, MakeCrcCordState(12345)); CordRep::Ref(crc); CordRep* nocrc = RemoveCrcNode(crc); EXPECT_THAT(nocrc, Eq(rep)); diff --git a/absl/strings/internal/cordz_info_statistics_test.cc b/absl/strings/internal/cordz_info_statistics_test.cc index 6d6feb52..53d2f2ea 100644 --- a/absl/strings/internal/cordz_info_statistics_test.cc +++ b/absl/strings/internal/cordz_info_statistics_test.cc @@ -19,6 +19,7 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" #include "absl/base/config.h" +#include "absl/crc/internal/crc_cord_state.h" #include "absl/strings/cord.h" #include "absl/strings/internal/cord_internal.h" #include "absl/strings/internal/cord_rep_btree.h" @@ -451,7 +452,8 @@ TEST(CordzInfoStatisticsTest, BtreeNodeShared) { TEST(CordzInfoStatisticsTest, Crc) { RefHelper ref; auto* left = Flat(1000); - auto* crc = ref.NeedsUnref(CordRepCrc::New(left, 12345)); + auto* crc = + ref.NeedsUnref(CordRepCrc::New(left, crc_internal::CrcCordState())); CordzStatistics expected; expected.size = left->length; diff --git a/absl/strings/internal/damerau_levenshtein_distance.cc b/absl/strings/internal/damerau_levenshtein_distance.cc index 7cc23acd..a084568f 100644 --- a/absl/strings/internal/damerau_levenshtein_distance.cc +++ b/absl/strings/internal/damerau_levenshtein_distance.cc @@ -31,8 +31,8 @@ namespace strings_internal { // detected. // When the distance is larger than cutoff, or one of the strings has more // than MAX_SIZE=100 characters, the code returns min(MAX_SIZE, cutoff) + 1. -size_t CappedDamerauLevenshteinDistance(absl::string_view s1, - absl::string_view s2, uint8_t cutoff) { +uint8_t CappedDamerauLevenshteinDistance(absl::string_view s1, + absl::string_view s2, uint8_t cutoff) { const uint8_t MAX_SIZE = 100; const uint8_t _cutoff = std::min(MAX_SIZE, cutoff); const uint8_t cutoff_plus_1 = static_cast<uint8_t>(_cutoff + 1); @@ -42,7 +42,7 @@ size_t CappedDamerauLevenshteinDistance(absl::string_view s1, return cutoff_plus_1; if (s1.empty()) - return std::min(static_cast<size_t>(cutoff_plus_1), s2.size()); + return static_cast<uint8_t>(s2.size()); // Lower diagonal bound: y = x - lower_diag const uint8_t lower_diag = diff --git a/absl/strings/internal/damerau_levenshtein_distance.h b/absl/strings/internal/damerau_levenshtein_distance.h index b9bb6fe1..1a968425 100644 --- a/absl/strings/internal/damerau_levenshtein_distance.h +++ b/absl/strings/internal/damerau_levenshtein_distance.h @@ -25,8 +25,8 @@ ABSL_NAMESPACE_BEGIN namespace strings_internal { // Calculate DamerauLevenshtein distance between two strings. // When the distance is larger than cutoff, the code just returns cutoff + 1. -size_t CappedDamerauLevenshteinDistance(absl::string_view s1, - absl::string_view s2, uint8_t cutoff); +uint8_t CappedDamerauLevenshteinDistance(absl::string_view s1, + absl::string_view s2, uint8_t cutoff); } // namespace strings_internal ABSL_NAMESPACE_END diff --git a/absl/strings/internal/damerau_levenshtein_distance_test.cc b/absl/strings/internal/damerau_levenshtein_distance_test.cc index 45cb5bd9..a342b7db 100644 --- a/absl/strings/internal/damerau_levenshtein_distance_test.cc +++ b/absl/strings/internal/damerau_levenshtein_distance_test.cc @@ -24,76 +24,76 @@ namespace { using absl::strings_internal::CappedDamerauLevenshteinDistance; TEST(Distance, TestDistances) { - EXPECT_THAT(CappedDamerauLevenshteinDistance("ab", "ab", 6), 0u); - EXPECT_THAT(CappedDamerauLevenshteinDistance("a", "b", 6), 1u); - EXPECT_THAT(CappedDamerauLevenshteinDistance("ca", "abc", 6), 3u); - EXPECT_THAT(CappedDamerauLevenshteinDistance("abcd", "ad", 6), 2u); - EXPECT_THAT(CappedDamerauLevenshteinDistance("abcd", "cadb", 6), 4u); - EXPECT_THAT(CappedDamerauLevenshteinDistance("abcd", "bdac", 6), 4u); - EXPECT_THAT(CappedDamerauLevenshteinDistance("ab", "ab", 0), 0u); - EXPECT_THAT(CappedDamerauLevenshteinDistance("", "", 0), 0u); + EXPECT_THAT(CappedDamerauLevenshteinDistance("ab", "ab", 6), uint8_t{0}); + EXPECT_THAT(CappedDamerauLevenshteinDistance("a", "b", 6), uint8_t{1}); + EXPECT_THAT(CappedDamerauLevenshteinDistance("ca", "abc", 6), uint8_t{3}); + EXPECT_THAT(CappedDamerauLevenshteinDistance("abcd", "ad", 6), uint8_t{2}); + EXPECT_THAT(CappedDamerauLevenshteinDistance("abcd", "cadb", 6), uint8_t{4}); + EXPECT_THAT(CappedDamerauLevenshteinDistance("abcd", "bdac", 6), uint8_t{4}); + EXPECT_THAT(CappedDamerauLevenshteinDistance("ab", "ab", 0), uint8_t{0}); + EXPECT_THAT(CappedDamerauLevenshteinDistance("", "", 0), uint8_t{0}); // combinations for 3-character strings: // 1, 2, 3 removals, insertions or replacements and transpositions - EXPECT_THAT(CappedDamerauLevenshteinDistance("abc", "abc", 6), 0u); + EXPECT_THAT(CappedDamerauLevenshteinDistance("abc", "abc", 6), uint8_t{0}); for (auto res : {"", "ca", "efg", "ea", "ce", "ceb", "eca", "cae", "cea", "bea"}) { - EXPECT_THAT(CappedDamerauLevenshteinDistance("abc", res, 6), 3u); - EXPECT_THAT(CappedDamerauLevenshteinDistance(res, "abc", 6), 3u); + EXPECT_THAT(CappedDamerauLevenshteinDistance("abc", res, 6), uint8_t{3}); + EXPECT_THAT(CappedDamerauLevenshteinDistance(res, "abc", 6), uint8_t{3}); } for (auto res : {"a", "b", "c", "ba", "cb", "bca", "cab", "cba", "ace", "efc", "ebf", "aef", "ae", "be", "eb", "ec", "ecb", "bec", "bce", "cbe", "ace", "eac", "aeb", "bae", "eab", "eba"}) { - EXPECT_THAT(CappedDamerauLevenshteinDistance("abc", res, 6), 2u); - EXPECT_THAT(CappedDamerauLevenshteinDistance(res, "abc", 6), 2u); + EXPECT_THAT(CappedDamerauLevenshteinDistance("abc", res, 6), uint8_t{2}); + EXPECT_THAT(CappedDamerauLevenshteinDistance(res, "abc", 6), uint8_t{2}); } for (auto res : {"ab", "ac", "bc", "acb", "bac", "ebc", "aec", "abe"}) { - EXPECT_THAT(CappedDamerauLevenshteinDistance("abc", res, 6), 1u); - EXPECT_THAT(CappedDamerauLevenshteinDistance(res, "abc", 6), 1u); + EXPECT_THAT(CappedDamerauLevenshteinDistance("abc", res, 6), uint8_t{1}); + EXPECT_THAT(CappedDamerauLevenshteinDistance(res, "abc", 6), uint8_t{1}); } } TEST(Distance, TestCutoff) { // Returing cutoff + 1 if the value is larger than cutoff or string longer // than MAX_SIZE. - EXPECT_THAT(CappedDamerauLevenshteinDistance("abcd", "a", 3), 3u); - EXPECT_THAT(CappedDamerauLevenshteinDistance("abcd", "a", 2), 3u); - EXPECT_THAT(CappedDamerauLevenshteinDistance("abcd", "a", 1), 2u); - EXPECT_THAT(CappedDamerauLevenshteinDistance("abcdefg", "a", 2), 3u); - EXPECT_THAT(CappedDamerauLevenshteinDistance("a", "abcde", 2), 3u); + EXPECT_THAT(CappedDamerauLevenshteinDistance("abcd", "a", 3), uint8_t{3}); + EXPECT_THAT(CappedDamerauLevenshteinDistance("abcd", "a", 2), uint8_t{3}); + EXPECT_THAT(CappedDamerauLevenshteinDistance("abcd", "a", 1), uint8_t{2}); + EXPECT_THAT(CappedDamerauLevenshteinDistance("abcdefg", "a", 2), uint8_t{3}); + EXPECT_THAT(CappedDamerauLevenshteinDistance("a", "abcde", 2), uint8_t{3}); EXPECT_THAT(CappedDamerauLevenshteinDistance(std::string(102, 'a'), std::string(102, 'a'), 105), - 101u); + uint8_t{101}); EXPECT_THAT(CappedDamerauLevenshteinDistance(std::string(100, 'a'), std::string(100, 'a'), 100), - 0u); + uint8_t{0}); EXPECT_THAT(CappedDamerauLevenshteinDistance(std::string(100, 'a'), std::string(100, 'b'), 100), - 100u); + uint8_t{100}); EXPECT_THAT(CappedDamerauLevenshteinDistance(std::string(100, 'a'), std::string(99, 'a'), 2), - 1u); + uint8_t{1}); EXPECT_THAT(CappedDamerauLevenshteinDistance(std::string(100, 'a'), std::string(101, 'a'), 2), - 3u); + uint8_t{3}); EXPECT_THAT(CappedDamerauLevenshteinDistance(std::string(100, 'a'), std::string(101, 'a'), 2), - 3u); + uint8_t{3}); EXPECT_THAT(CappedDamerauLevenshteinDistance(std::string(UINT8_MAX + 1, 'a'), std::string(UINT8_MAX + 1, 'b'), UINT8_MAX), - 101u); + uint8_t{101}); EXPECT_THAT(CappedDamerauLevenshteinDistance(std::string(UINT8_MAX - 1, 'a'), std::string(UINT8_MAX - 1, 'b'), UINT8_MAX), - 101u); + uint8_t{101}); EXPECT_THAT( CappedDamerauLevenshteinDistance(std::string(UINT8_MAX, 'a'), std::string(UINT8_MAX, 'b'), UINT8_MAX), - 101u); + uint8_t{101}); EXPECT_THAT(CappedDamerauLevenshteinDistance(std::string(UINT8_MAX - 1, 'a'), std::string(UINT8_MAX - 1, 'a'), UINT8_MAX), - 101u); + uint8_t{101}); } } // namespace diff --git a/absl/strings/internal/has_absl_stringify.h b/absl/strings/internal/has_absl_stringify.h new file mode 100644 index 00000000..55a08508 --- /dev/null +++ b/absl/strings/internal/has_absl_stringify.h @@ -0,0 +1,55 @@ +// Copyright 2022 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_HAS_ABSL_STRINGIFY_H_ +#define ABSL_STRINGS_INTERNAL_HAS_ABSL_STRINGIFY_H_ +#include <string> +#include <type_traits> +#include <utility> + +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN + +namespace strings_internal { + +// This is an empty class not intended to be used. It exists so that +// `HasAbslStringify` can reference a universal class rather than needing to be +// copied for each new sink. +class UnimplementedSink { + public: + void Append(size_t count, char ch); + + void Append(string_view v); + + // Support `absl::Format(&sink, format, args...)`. + friend void AbslFormatFlush(UnimplementedSink* sink, absl::string_view v); +}; + +template <typename T, typename = void> +struct HasAbslStringify : std::false_type {}; + +template <typename T> +struct HasAbslStringify< + T, std::enable_if_t<std::is_void<decltype(AbslStringify( + std::declval<strings_internal::UnimplementedSink&>(), + std::declval<const T&>()))>::value>> : std::true_type {}; + +} // namespace strings_internal + +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_HAS_ABSL_STRINGIFY_H_ diff --git a/absl/strings/internal/str_format/arg.cc b/absl/strings/internal/str_format/arg.cc index 967fe9ca..018dd052 100644 --- a/absl/strings/internal/str_format/arg.cc +++ b/absl/strings/internal/str_format/arg.cc @@ -297,6 +297,37 @@ constexpr auto ConvertV(T) { } template <typename T> +bool ConvertFloatArg(T v, FormatConversionSpecImpl conv, FormatSinkImpl *sink) { + if (conv.conversion_char() == FormatConversionCharInternal::v) { + conv.set_conversion_char(FormatConversionCharInternal::g); + } + + return FormatConversionCharIsFloat(conv.conversion_char()) && + ConvertFloatImpl(v, conv, sink); +} + +inline bool ConvertStringArg(string_view v, const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + if (conv.is_basic()) { + sink->Append(v); + return true; + } + return sink->PutPaddedString(v, conv.width(), conv.precision(), + conv.has_left_flag()); +} + +} // namespace + +bool ConvertBoolArg(bool v, FormatSinkImpl *sink) { + if (v) { + sink->Append("true"); + } else { + sink->Append("false"); + } + return true; +} + +template <typename T> bool ConvertIntArg(T v, FormatConversionSpecImpl conv, FormatSinkImpl *sink) { using U = typename MakeUnsigned<T>::type; IntDigits as_digits; @@ -354,36 +385,37 @@ bool ConvertIntArg(T v, FormatConversionSpecImpl conv, FormatSinkImpl *sink) { return ConvertIntImplInnerSlow(as_digits, conv, sink); } -template <typename T> -bool ConvertFloatArg(T v, FormatConversionSpecImpl conv, FormatSinkImpl *sink) { - if (conv.conversion_char() == FormatConversionCharInternal::v) { - conv.set_conversion_char(FormatConversionCharInternal::g); - } - - return FormatConversionCharIsFloat(conv.conversion_char()) && - ConvertFloatImpl(v, conv, sink); -} - -inline bool ConvertStringArg(string_view v, const FormatConversionSpecImpl conv, - FormatSinkImpl *sink) { - if (conv.is_basic()) { - sink->Append(v); - return true; - } - return sink->PutPaddedString(v, conv.width(), conv.precision(), - conv.has_left_flag()); -} - -} // namespace - -bool ConvertBoolArg(bool v, FormatSinkImpl *sink) { - if (v) { - sink->Append("true"); - } else { - sink->Append("false"); - } - return true; -} +template bool ConvertIntArg<char>(char v, FormatConversionSpecImpl conv, + FormatSinkImpl *sink); +template bool ConvertIntArg<signed char>(signed char v, + FormatConversionSpecImpl conv, + FormatSinkImpl *sink); +template bool ConvertIntArg<unsigned char>(unsigned char v, + FormatConversionSpecImpl conv, + FormatSinkImpl *sink); +template bool ConvertIntArg<short>(short v, // NOLINT + FormatConversionSpecImpl conv, + FormatSinkImpl *sink); +template bool ConvertIntArg<unsigned short>(unsigned short v, // NOLINT + FormatConversionSpecImpl conv, + FormatSinkImpl *sink); +template bool ConvertIntArg<int>(int v, FormatConversionSpecImpl conv, + FormatSinkImpl *sink); +template bool ConvertIntArg<unsigned int>(unsigned int v, + FormatConversionSpecImpl conv, + FormatSinkImpl *sink); +template bool ConvertIntArg<long>(long v, // NOLINT + FormatConversionSpecImpl conv, + FormatSinkImpl *sink); +template bool ConvertIntArg<unsigned long>(unsigned long v, // NOLINT + FormatConversionSpecImpl conv, + FormatSinkImpl *sink); +template bool ConvertIntArg<long long>(long long v, // NOLINT + FormatConversionSpecImpl conv, + FormatSinkImpl *sink); +template bool ConvertIntArg<unsigned long long>(unsigned long long v, // NOLINT + FormatConversionSpecImpl conv, + FormatSinkImpl *sink); // ==================== Strings ==================== StringConvertResult FormatConvertImpl(const std::string &v, diff --git a/absl/strings/internal/str_format/arg.h b/absl/strings/internal/str_format/arg.h index b3e4ff15..e4b16628 100644 --- a/absl/strings/internal/str_format/arg.h +++ b/absl/strings/internal/str_format/arg.h @@ -18,6 +18,7 @@ #include <string.h> #include <wchar.h> +#include <algorithm> #include <cstdio> #include <iomanip> #include <limits> @@ -25,10 +26,12 @@ #include <sstream> #include <string> #include <type_traits> +#include <utility> #include "absl/base/port.h" #include "absl/meta/type_traits.h" #include "absl/numeric/int128.h" +#include "absl/strings/internal/has_absl_stringify.h" #include "absl/strings/internal/str_format/extension.h" #include "absl/strings/string_view.h" @@ -50,6 +53,19 @@ struct ArgConvertResult { bool value; }; +using IntegralConvertResult = ArgConvertResult<FormatConversionCharSetUnion( + FormatConversionCharSetInternal::c, + FormatConversionCharSetInternal::kNumeric, + FormatConversionCharSetInternal::kStar, + FormatConversionCharSetInternal::v)>; +using FloatingConvertResult = ArgConvertResult<FormatConversionCharSetUnion( + FormatConversionCharSetInternal::kFloating, + FormatConversionCharSetInternal::v)>; +using CharConvertResult = ArgConvertResult<FormatConversionCharSetUnion( + FormatConversionCharSetInternal::c, + FormatConversionCharSetInternal::kNumeric, + FormatConversionCharSetInternal::kStar)>; + template <typename T, typename = void> struct HasUserDefinedConvert : std::false_type {}; @@ -67,6 +83,44 @@ void AbslFormatConvert(); void AbslStringify(); template <typename T> +bool ConvertIntArg(T v, FormatConversionSpecImpl conv, FormatSinkImpl* sink); + +// Forward declarations of internal `ConvertIntArg` function template +// instantiations are here to avoid including the template body in the headers +// and instantiating it in large numbers of translation units. Explicit +// instantiations can be found in "absl/strings/internal/str_format/arg.cc" +extern template bool ConvertIntArg<char>(char v, FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +extern template bool ConvertIntArg<signed char>(signed char v, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +extern template bool ConvertIntArg<unsigned char>(unsigned char v, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +extern template bool ConvertIntArg<short>(short v, // NOLINT + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +extern template bool ConvertIntArg<unsigned short>( // NOLINT + unsigned short v, FormatConversionSpecImpl conv, // NOLINT + FormatSinkImpl* sink); +extern template bool ConvertIntArg<int>(int v, FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +extern template bool ConvertIntArg<unsigned int>(unsigned int v, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +extern template bool ConvertIntArg<long>( // NOLINT + long v, FormatConversionSpecImpl conv, FormatSinkImpl* sink); // NOLINT +extern template bool ConvertIntArg<unsigned long>(unsigned long v, // NOLINT + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +extern template bool ConvertIntArg<long long>(long long v, // NOLINT + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +extern template bool ConvertIntArg<unsigned long long>( // NOLINT + unsigned long long v, FormatConversionSpecImpl conv, // NOLINT + FormatSinkImpl* sink); + +template <typename T> auto FormatConvertImpl(const T& v, FormatConversionSpecImpl conv, FormatSinkImpl* sink) -> decltype(AbslFormatConvert(v, @@ -82,10 +136,30 @@ auto FormatConvertImpl(const T& v, FormatConversionSpecImpl conv, } template <typename T> +auto FormatConvertImpl(const T& v, FormatConversionSpecImpl conv, + FormatSinkImpl* sink) + -> std::enable_if_t<std::is_enum<T>::value && + std::is_void<decltype(AbslStringify( + std::declval<FormatSink&>(), v))>::value, + IntegralConvertResult> { + if (conv.conversion_char() == FormatConversionCharInternal::v) { + using FormatSinkT = + absl::enable_if_t<sizeof(const T& (*)()) != 0, FormatSink>; + auto fs = sink->Wrap<FormatSinkT>(); + AbslStringify(fs, v); + return {true}; + } else { + return {ConvertIntArg( + static_cast<typename std::underlying_type<T>::type>(v), conv, sink)}; + } +} + +template <typename T> auto FormatConvertImpl(const T& v, FormatConversionSpecImpl, FormatSinkImpl* sink) - -> std::enable_if_t<std::is_void<decltype(AbslStringify( - std::declval<FormatSink&>(), v))>::value, + -> std::enable_if_t<!std::is_enum<T>::value && + std::is_void<decltype(AbslStringify( + std::declval<FormatSink&>(), v))>::value, ArgConvertResult<FormatConversionCharSetInternal::v>> { using FormatSinkT = absl::enable_if_t<sizeof(const T& (*)()) != 0, FormatSink>; @@ -191,19 +265,6 @@ StringConvertResult FormatConvertImpl(const AbslCord& value, return {true}; } -using IntegralConvertResult = ArgConvertResult<FormatConversionCharSetUnion( - FormatConversionCharSetInternal::c, - FormatConversionCharSetInternal::kNumeric, - FormatConversionCharSetInternal::kStar, - FormatConversionCharSetInternal::v)>; -using FloatingConvertResult = ArgConvertResult<FormatConversionCharSetUnion( - FormatConversionCharSetInternal::kFloating, - FormatConversionCharSetInternal::v)>; -using CharConvertResult = ArgConvertResult<FormatConversionCharSetUnion( - FormatConversionCharSetInternal::c, - FormatConversionCharSetInternal::kNumeric, - FormatConversionCharSetInternal::kStar)>; - bool ConvertBoolArg(bool v, FormatSinkImpl* sink); // Floats. @@ -271,7 +332,8 @@ IntegralConvertResult FormatConvertImpl(T v, FormatConversionSpecImpl conv, // FormatArgImpl will use the underlying Convert functions instead. template <typename T> typename std::enable_if<std::is_enum<T>::value && - !HasUserDefinedConvert<T>::value, + !HasUserDefinedConvert<T>::value && + !strings_internal::HasAbslStringify<T>::value, IntegralConvertResult>::type FormatConvertImpl(T v, FormatConversionSpecImpl conv, FormatSinkImpl* sink); @@ -384,7 +446,8 @@ class FormatArgImpl { template <typename T, typename = void> struct DecayType { static constexpr bool kHasUserDefined = - str_format_internal::HasUserDefinedConvert<T>::value; + str_format_internal::HasUserDefinedConvert<T>::value || + strings_internal::HasAbslStringify<T>::value; using type = typename std::conditional< !kHasUserDefined && std::is_convertible<T, const char*>::value, const char*, @@ -396,6 +459,7 @@ class FormatArgImpl { struct DecayType<T, typename std::enable_if< !str_format_internal::HasUserDefinedConvert<T>::value && + !strings_internal::HasAbslStringify<T>::value && std::is_enum<T>::value>::type> { using type = typename std::underlying_type<T>::type; }; diff --git a/absl/strings/internal/str_format/checker.h b/absl/strings/internal/str_format/checker.h index aeb9d48d..eab6ab9d 100644 --- a/absl/strings/internal/str_format/checker.h +++ b/absl/strings/internal/str_format/checker.h @@ -15,8 +15,11 @@ #ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_CHECKER_H_ #define ABSL_STRINGS_INTERNAL_STR_FORMAT_CHECKER_H_ +#include <algorithm> + #include "absl/base/attributes.h" #include "absl/strings/internal/str_format/arg.h" +#include "absl/strings/internal/str_format/constexpr_parser.h" #include "absl/strings/internal/str_format/extension.h" // Compile time check support for entry points. @@ -36,333 +39,56 @@ namespace absl { ABSL_NAMESPACE_BEGIN namespace str_format_internal { -constexpr bool AllOf() { return true; } - -template <typename... T> -constexpr bool AllOf(bool b, T... t) { - return b && AllOf(t...); -} - #ifdef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER -constexpr bool ContainsChar(const char* chars, char c) { - return *chars == c || (*chars && ContainsChar(chars + 1, c)); -} - -// A constexpr compatible list of Convs. -struct ConvList { - const FormatConversionCharSet* array; - int count; - - // We do the bound check here to avoid having to do it on the callers. - // Returning an empty FormatConversionCharSet has the same effect as - // short circuiting because it will never match any conversion. - constexpr FormatConversionCharSet operator[](int i) const { - return i < count ? array[i] : FormatConversionCharSet{}; - } - - constexpr ConvList without_front() const { - return count != 0 ? ConvList{array + 1, count - 1} : *this; - } -}; - -template <size_t count> -struct ConvListT { - // Make sure the array has size > 0. - FormatConversionCharSet list[count ? count : 1]; -}; - -constexpr char GetChar(string_view str, size_t index) { - return index < str.size() ? str[index] : char{}; -} - -constexpr string_view ConsumeFront(string_view str, size_t len = 1) { - return len <= str.size() ? string_view(str.data() + len, str.size() - len) - : string_view(); -} - -constexpr string_view ConsumeAnyOf(string_view format, const char* chars) { - while (ContainsChar(chars, GetChar(format, 0))) { - format = ConsumeFront(format); - } - return format; -} - -constexpr bool IsDigit(char c) { return c >= '0' && c <= '9'; } - -// Helper class for the ParseDigits function. -// It encapsulates the two return values we need there. -struct Integer { - string_view format; - int value; - - // If the next character is a '$', consume it. - // Otherwise, make `this` an invalid positional argument. - constexpr Integer ConsumePositionalDollar() const { - if (GetChar(format, 0) == '$') { - return Integer{ConsumeFront(format), value}; - } else { - return Integer{format, 0}; - } - } -}; - -constexpr Integer ParseDigits(string_view format) { - int value = 0; - while (IsDigit(GetChar(format, 0))) { - value = 10 * value + GetChar(format, 0) - '0'; - format = ConsumeFront(format); - } - - return Integer{format, value}; -} - -// Parse digits for a positional argument. -// The parsing also consumes the '$'. -constexpr Integer ParsePositional(string_view format) { - return ParseDigits(format).ConsumePositionalDollar(); -} - -// Parses a single conversion specifier. -// See ConvParser::Run() for post conditions. -class ConvParser { - constexpr ConvParser SetFormat(string_view format) const { - return ConvParser(format, args_, error_, arg_position_, is_positional_); - } - - constexpr ConvParser SetArgs(ConvList args) const { - return ConvParser(format_, args, error_, arg_position_, is_positional_); - } - - constexpr ConvParser SetError(bool error) const { - return ConvParser(format_, args_, error_ || error, arg_position_, - is_positional_); - } - - constexpr ConvParser SetArgPosition(int arg_position) const { - return ConvParser(format_, args_, error_, arg_position, is_positional_); - } - - // Consumes the next arg and verifies that it matches `conv`. - // `error_` is set if there is no next arg or if it doesn't match `conv`. - constexpr ConvParser ConsumeNextArg(char conv) const { - return SetArgs(args_.without_front()).SetError(!Contains(args_[0], conv)); - } - - // Verify that positional argument `i.value` matches `conv`. - // `error_` is set if `i.value` is not a valid argument or if it doesn't - // match. - constexpr ConvParser VerifyPositional(Integer i, char conv) const { - return SetFormat(i.format).SetError(!Contains(args_[i.value - 1], conv)); - } - - // Parse the position of the arg and store it in `arg_position_`. - constexpr ConvParser ParseArgPosition(Integer arg) const { - return SetFormat(arg.format).SetArgPosition(arg.value); - } - - // Consume the flags. - constexpr ConvParser ParseFlags() const { - return SetFormat(ConsumeAnyOf(format_, "-+ #0")); - } - - // Consume the width. - // If it is '*', we verify that it matches `args_`. `error_` is set if it - // doesn't match. - constexpr ConvParser ParseWidth() const { - char first_char = GetChar(format_, 0); - - if (IsDigit(first_char)) { - return SetFormat(ParseDigits(format_).format); - } else if (first_char == '*') { - if (is_positional_) { - return VerifyPositional(ParsePositional(ConsumeFront(format_)), '*'); - } else { - return SetFormat(ConsumeFront(format_)).ConsumeNextArg('*'); - } - } else { - return *this; +template <FormatConversionCharSet... C> +constexpr bool ValidFormatImpl(string_view format) { + int next_arg = 0; + const char* p = format.data(); + const char* const end = p + format.size(); + constexpr FormatConversionCharSet + kAllowedConvs[(std::max)(sizeof...(C), size_t{1})] = {C...}; + bool used[(std::max)(sizeof...(C), size_t{1})]{}; + constexpr int kNumArgs = sizeof...(C); + while (p != end) { + while (p != end && *p != '%') ++p; + if (p == end) { + break; } - } - - // Consume the precision. - // If it is '*', we verify that it matches `args_`. `error_` is set if it - // doesn't match. - constexpr ConvParser ParsePrecision() const { - if (GetChar(format_, 0) != '.') { - return *this; - } else if (GetChar(format_, 1) == '*') { - if (is_positional_) { - return VerifyPositional(ParsePositional(ConsumeFront(format_, 2)), '*'); - } else { - return SetFormat(ConsumeFront(format_, 2)).ConsumeNextArg('*'); - } - } else { - return SetFormat(ParseDigits(ConsumeFront(format_)).format); + if (p + 1 >= end) return false; + if (p[1] == '%') { + // %% + p += 2; + continue; } - } - - // Consume the length characters. - constexpr ConvParser ParseLength() const { - return SetFormat(ConsumeAnyOf(format_, "lLhjztq")); - } - - // Consume the conversion character and verify that it matches `args_`. - // `error_` is set if it doesn't match. - constexpr ConvParser ParseConversion() const { - char first_char = GetChar(format_, 0); - if (first_char == 'v' && *(format_.data() - 1) != '%') { - return SetError(true); + UnboundConversion conv(absl::kConstInit); + p = ConsumeUnboundConversion(p + 1, end, &conv, &next_arg); + if (p == nullptr) return false; + if (conv.arg_position <= 0 || conv.arg_position > kNumArgs) { + return false; } - - if (is_positional_) { - return VerifyPositional({ConsumeFront(format_), arg_position_}, - first_char); - } else { - return ConsumeNextArg(first_char).SetFormat(ConsumeFront(format_)); + if (!Contains(kAllowedConvs[conv.arg_position - 1], conv.conv)) { + return false; } - } - - constexpr ConvParser(string_view format, ConvList args, bool error, - int arg_position, bool is_positional) - : format_(format), - args_(args), - error_(error), - arg_position_(arg_position), - is_positional_(is_positional) {} - - public: - constexpr ConvParser(string_view format, ConvList args, bool is_positional) - : format_(format), - args_(args), - error_(false), - arg_position_(0), - is_positional_(is_positional) {} - - // Consume the whole conversion specifier. - // `format()` will be set to the character after the conversion character. - // `error()` will be set if any of the arguments do not match. - constexpr ConvParser Run() const { - ConvParser parser = *this; - - if (is_positional_) { - parser = ParseArgPosition(ParsePositional(format_)); - } - - return parser.ParseFlags() - .ParseWidth() - .ParsePrecision() - .ParseLength() - .ParseConversion(); - } - - constexpr string_view format() const { return format_; } - constexpr ConvList args() const { return args_; } - constexpr bool error() const { return error_; } - constexpr bool is_positional() const { return is_positional_; } - - private: - string_view format_; - // Current list of arguments. If we are not in positional mode we will consume - // from the front. - ConvList args_; - bool error_; - // Holds the argument position of the conversion character, if we are in - // positional mode. Otherwise, it is unspecified. - int arg_position_; - // Whether we are in positional mode. - // It changes the behavior of '*' and where to find the converted argument. - bool is_positional_; -}; - -// Parses a whole format expression. -// See FormatParser::Run(). -class FormatParser { - static constexpr bool FoundPercent(string_view format) { - return format.empty() || - (GetChar(format, 0) == '%' && GetChar(format, 1) != '%'); - } - - // We use an inner function to increase the recursion limit. - // The inner function consumes up to `limit` characters on every run. - // This increases the limit from 512 to ~512*limit. - static constexpr string_view ConsumeNonPercentInner(string_view format) { - int limit = 20; - while (!FoundPercent(format) && limit != 0) { - size_t len = 0; - - if (GetChar(format, 0) == '%' && GetChar(format, 1) == '%') { - len = 2; - } else { - len = 1; + used[conv.arg_position - 1] = true; + for (auto extra : {conv.width, conv.precision}) { + if (extra.is_from_arg()) { + int pos = extra.get_from_arg(); + if (pos <= 0 || pos > kNumArgs) return false; + used[pos - 1] = true; + if (!Contains(kAllowedConvs[pos - 1], '*')) { + return false; + } } - - format = ConsumeFront(format, len); - --limit; } - - return format; } - - // Consume characters until the next conversion spec %. - // It skips %%. - static constexpr string_view ConsumeNonPercent(string_view format) { - while (!FoundPercent(format)) { - format = ConsumeNonPercentInner(format); + if (sizeof...(C) != 0) { + for (bool b : used) { + if (!b) return false; } - - return format; - } - - static constexpr bool IsPositional(string_view format) { - while (IsDigit(GetChar(format, 0))) { - format = ConsumeFront(format); - } - - return GetChar(format, 0) == '$'; } - - constexpr bool RunImpl(bool is_positional) const { - // In non-positional mode we require all arguments to be consumed. - // In positional mode just reaching the end of the format without errors is - // enough. - return (format_.empty() && (is_positional || args_.count == 0)) || - (!format_.empty() && - ValidateArg( - ConvParser(ConsumeFront(format_), args_, is_positional).Run())); - } - - constexpr bool ValidateArg(ConvParser conv) const { - return !conv.error() && FormatParser(conv.format(), conv.args()) - .RunImpl(conv.is_positional()); - } - - public: - constexpr FormatParser(string_view format, ConvList args) - : format_(ConsumeNonPercent(format)), args_(args) {} - - // Runs the parser for `format` and `args`. - // It verifies that the format is valid and that all conversion specifiers - // match the arguments passed. - // In non-positional mode it also verfies that all arguments are consumed. - constexpr bool Run() const { - return RunImpl(!format_.empty() && IsPositional(ConsumeFront(format_))); - } - - private: - string_view format_; - // Current list of arguments. - // If we are not in positional mode we will consume from the front and will - // have to be empty in the end. - ConvList args_; -}; - -template <FormatConversionCharSet... C> -constexpr bool ValidFormatImpl(string_view format) { - return FormatParser(format, - {ConvListT<sizeof...(C)>{{C...}}.list, sizeof...(C)}) - .Run(); + return true; } #endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER diff --git a/absl/strings/internal/str_format/checker_test.cc b/absl/strings/internal/str_format/checker_test.cc index 680517f7..a86bed38 100644 --- a/absl/strings/internal/str_format/checker_test.cc +++ b/absl/strings/internal/str_format/checker_test.cc @@ -93,6 +93,7 @@ TEST(StrFormatChecker, ValidFormat) { ValidFormat<void (*)(), volatile int*>("%p %p"), // ValidFormat<string_view, const char*, double, void*>( "string_view=%s const char*=%s double=%f void*=%p)"), + ValidFormat<int>("%v"), // ValidFormat<int>("%% %1$d"), // ValidFormat<int>("%1$ld"), // @@ -109,7 +110,9 @@ TEST(StrFormatChecker, ValidFormat) { ValidFormat<int, double>("%2$.*1$f"), // ValidFormat<void*, string_view, const char*, double>( "string_view=%2$s const char*=%3$s double=%4$f void*=%1$p " - "repeat=%3$s)")}; + "repeat=%3$s)"), + ValidFormat<std::string>("%1$v"), + }; for (Case c : trues) { EXPECT_TRUE(c.result) << c.format; @@ -130,6 +133,8 @@ TEST(StrFormatChecker, ValidFormat) { ValidFormat<int>("%*d"), // ValidFormat<std::string>("%p"), // ValidFormat<int (*)(int)>("%d"), // + ValidFormat<int>("%1v"), // + ValidFormat<int>("%.1v"), // ValidFormat<>("%3$d"), // ValidFormat<>("%1$r"), // @@ -138,13 +143,14 @@ TEST(StrFormatChecker, ValidFormat) { ValidFormat<int>("%1$*2$1d"), // ValidFormat<int>("%1$1-d"), // ValidFormat<std::string, int>("%2$*1$s"), // - ValidFormat<std::string>("%1$p"), + ValidFormat<std::string>("%1$p"), // + ValidFormat<int>("%1$*2$v"), // ValidFormat<int, int>("%d %2$d"), // }; for (Case c : falses) { - EXPECT_FALSE(c.result) << c.format; + EXPECT_FALSE(c.result) << "format<" << c.format << ">"; } } diff --git a/absl/strings/internal/str_format/constexpr_parser.h b/absl/strings/internal/str_format/constexpr_parser.h new file mode 100644 index 00000000..3dc1776b --- /dev/null +++ b/absl/strings/internal/str_format/constexpr_parser.h @@ -0,0 +1,351 @@ +// Copyright 2022 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_CONSTEXPR_PARSER_H_ +#define ABSL_STRINGS_INTERNAL_STR_FORMAT_CONSTEXPR_PARSER_H_ + +#include <cassert> +#include <cstdint> +#include <limits> + +#include "absl/base/const_init.h" +#include "absl/strings/internal/str_format/extension.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace str_format_internal { + +enum class LengthMod : std::uint8_t { h, hh, l, ll, L, j, z, t, q, none }; + +// The analyzed properties of a single specified conversion. +struct UnboundConversion { + // This is a user defined default constructor on purpose to skip the + // initialization of parts of the object that are not necessary. + UnboundConversion() {} // NOLINT + + // This constructor is provided for the static checker. We don't want to do + // the unnecessary initialization in the normal case. + explicit constexpr UnboundConversion(absl::ConstInitType) + : arg_position{}, width{}, precision{} {} + + class InputValue { + public: + constexpr void set_value(int value) { + assert(value >= 0); + value_ = value; + } + constexpr int value() const { return value_; } + + // Marks the value as "from arg". aka the '*' format. + // Requires `value >= 1`. + // When set, is_from_arg() return true and get_from_arg() returns the + // original value. + // `value()`'s return value is unspecified in this state. + constexpr void set_from_arg(int value) { + assert(value > 0); + value_ = -value - 1; + } + constexpr bool is_from_arg() const { return value_ < -1; } + constexpr int get_from_arg() const { + assert(is_from_arg()); + return -value_ - 1; + } + + private: + int value_ = -1; + }; + + // No need to initialize. It will always be set in the parser. + int arg_position; + + InputValue width; + InputValue precision; + + Flags flags = Flags::kBasic; + LengthMod length_mod = LengthMod::none; + FormatConversionChar conv = FormatConversionCharInternal::kNone; +}; + +// Helper tag class for the table below. +// It allows fast `char -> ConversionChar/LengthMod/Flags` checking and +// conversions. +class ConvTag { + public: + constexpr ConvTag(FormatConversionChar conversion_char) // NOLINT + : tag_(static_cast<uint8_t>(conversion_char)) {} + constexpr ConvTag(LengthMod length_mod) // NOLINT + : tag_(0x80 | static_cast<uint8_t>(length_mod)) {} + constexpr ConvTag(Flags flags) // NOLINT + : tag_(0xc0 | static_cast<uint8_t>(flags)) {} + constexpr ConvTag() : tag_(0xFF) {} + + constexpr bool is_conv() const { return (tag_ & 0x80) == 0; } + constexpr bool is_length() const { return (tag_ & 0xC0) == 0x80; } + constexpr bool is_flags() const { return (tag_ & 0xE0) == 0xC0; } + + constexpr FormatConversionChar as_conv() const { + assert(is_conv()); + assert(!is_length()); + assert(!is_flags()); + return static_cast<FormatConversionChar>(tag_); + } + constexpr LengthMod as_length() const { + assert(!is_conv()); + assert(is_length()); + assert(!is_flags()); + return static_cast<LengthMod>(tag_ & 0x3F); + } + constexpr Flags as_flags() const { + assert(!is_conv()); + assert(!is_length()); + assert(is_flags()); + return static_cast<Flags>(tag_ & 0x1F); + } + + private: + uint8_t tag_; +}; + +struct ConvTagHolder { + using CC = FormatConversionCharInternal; + using LM = LengthMod; + + // Abbreviations to fit in the table below. + static constexpr auto kFSign = Flags::kSignCol; + static constexpr auto kFAlt = Flags::kAlt; + static constexpr auto kFPos = Flags::kShowPos; + static constexpr auto kFLeft = Flags::kLeft; + static constexpr auto kFZero = Flags::kZero; + + static constexpr ConvTag value[256] = { + {}, {}, {}, {}, {}, {}, {}, {}, // 00-07 + {}, {}, {}, {}, {}, {}, {}, {}, // 08-0f + {}, {}, {}, {}, {}, {}, {}, {}, // 10-17 + {}, {}, {}, {}, {}, {}, {}, {}, // 18-1f + kFSign, {}, {}, kFAlt, {}, {}, {}, {}, // !"#$%&' + {}, {}, {}, kFPos, {}, kFLeft, {}, {}, // ()*+,-./ + kFZero, {}, {}, {}, {}, {}, {}, {}, // 01234567 + {}, {}, {}, {}, {}, {}, {}, {}, // 89:;<=>? + {}, CC::A, {}, {}, {}, CC::E, CC::F, CC::G, // @ABCDEFG + {}, {}, {}, {}, LM::L, {}, {}, {}, // HIJKLMNO + {}, {}, {}, {}, {}, {}, {}, {}, // PQRSTUVW + CC::X, {}, {}, {}, {}, {}, {}, {}, // XYZ[\]^_ + {}, CC::a, {}, CC::c, CC::d, CC::e, CC::f, CC::g, // `abcdefg + LM::h, CC::i, LM::j, {}, LM::l, {}, CC::n, CC::o, // hijklmno + CC::p, LM::q, {}, CC::s, LM::t, CC::u, CC::v, {}, // pqrstuvw + CC::x, {}, LM::z, {}, {}, {}, {}, {}, // xyz{|}! + {}, {}, {}, {}, {}, {}, {}, {}, // 80-87 + {}, {}, {}, {}, {}, {}, {}, {}, // 88-8f + {}, {}, {}, {}, {}, {}, {}, {}, // 90-97 + {}, {}, {}, {}, {}, {}, {}, {}, // 98-9f + {}, {}, {}, {}, {}, {}, {}, {}, // a0-a7 + {}, {}, {}, {}, {}, {}, {}, {}, // a8-af + {}, {}, {}, {}, {}, {}, {}, {}, // b0-b7 + {}, {}, {}, {}, {}, {}, {}, {}, // b8-bf + {}, {}, {}, {}, {}, {}, {}, {}, // c0-c7 + {}, {}, {}, {}, {}, {}, {}, {}, // c8-cf + {}, {}, {}, {}, {}, {}, {}, {}, // d0-d7 + {}, {}, {}, {}, {}, {}, {}, {}, // d8-df + {}, {}, {}, {}, {}, {}, {}, {}, // e0-e7 + {}, {}, {}, {}, {}, {}, {}, {}, // e8-ef + {}, {}, {}, {}, {}, {}, {}, {}, // f0-f7 + {}, {}, {}, {}, {}, {}, {}, {}, // f8-ff + }; +}; + +// Keep a single table for all the conversion chars and length modifiers. +constexpr ConvTag GetTagForChar(char c) { + return ConvTagHolder::value[static_cast<unsigned char>(c)]; +} + +constexpr bool CheckFastPathSetting(const UnboundConversion& conv) { + bool width_precision_needed = + conv.width.value() >= 0 || conv.precision.value() >= 0; + if (width_precision_needed && conv.flags == Flags::kBasic) { +#if defined(__clang__) + // Some compilers complain about this in constexpr even when not executed, + // so only enable the error dump in clang. + fprintf(stderr, + "basic=%d left=%d show_pos=%d sign_col=%d alt=%d zero=%d " + "width=%d precision=%d\n", + conv.flags == Flags::kBasic ? 1 : 0, + FlagsContains(conv.flags, Flags::kLeft) ? 1 : 0, + FlagsContains(conv.flags, Flags::kShowPos) ? 1 : 0, + FlagsContains(conv.flags, Flags::kSignCol) ? 1 : 0, + FlagsContains(conv.flags, Flags::kAlt) ? 1 : 0, + FlagsContains(conv.flags, Flags::kZero) ? 1 : 0, conv.width.value(), + conv.precision.value()); +#endif // defined(__clang__) + return false; + } + return true; +} + +constexpr int ParseDigits(char& c, const char*& pos, const char* const end) { + int digits = c - '0'; + // We do not want to overflow `digits` so we consume at most digits10 + // digits. If there are more digits the parsing will fail later on when the + // digit doesn't match the expected characters. + int num_digits = std::numeric_limits<int>::digits10; + for (;;) { + if (ABSL_PREDICT_FALSE(pos == end)) break; + c = *pos++; + if ('0' > c || c > '9') break; + --num_digits; + if (ABSL_PREDICT_FALSE(!num_digits)) break; + digits = 10 * digits + c - '0'; + } + return digits; +} + +template <bool is_positional> +constexpr const char* ConsumeConversion(const char* pos, const char* const end, + UnboundConversion* conv, + int* next_arg) { + const char* const original_pos = pos; + char c = 0; + // Read the next char into `c` and update `pos`. Returns false if there are + // no more chars to read. +#define ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR() \ + do { \ + if (ABSL_PREDICT_FALSE(pos == end)) return nullptr; \ + c = *pos++; \ + } while (0) + + if (is_positional) { + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr; + conv->arg_position = ParseDigits(c, pos, end); + assert(conv->arg_position > 0); + if (ABSL_PREDICT_FALSE(c != '$')) return nullptr; + } + + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + + // We should start with the basic flag on. + assert(conv->flags == Flags::kBasic); + + // Any non alpha character makes this conversion not basic. + // This includes flags (-+ #0), width (1-9, *) or precision (.). + // All conversion characters and length modifiers are alpha characters. + if (c < 'A') { + while (c <= '0') { + auto tag = GetTagForChar(c); + if (tag.is_flags()) { + conv->flags = conv->flags | tag.as_flags(); + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + } else { + break; + } + } + + if (c <= '9') { + if (c >= '0') { + int maybe_width = ParseDigits(c, pos, end); + if (!is_positional && c == '$') { + if (ABSL_PREDICT_FALSE(*next_arg != 0)) return nullptr; + // Positional conversion. + *next_arg = -1; + return ConsumeConversion<true>(original_pos, end, conv, next_arg); + } + conv->flags = conv->flags | Flags::kNonBasic; + conv->width.set_value(maybe_width); + } else if (c == '*') { + conv->flags = conv->flags | Flags::kNonBasic; + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + if (is_positional) { + if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr; + conv->width.set_from_arg(ParseDigits(c, pos, end)); + if (ABSL_PREDICT_FALSE(c != '$')) return nullptr; + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + } else { + conv->width.set_from_arg(++*next_arg); + } + } + } + + if (c == '.') { + conv->flags = conv->flags | Flags::kNonBasic; + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + if ('0' <= c && c <= '9') { + conv->precision.set_value(ParseDigits(c, pos, end)); + } else if (c == '*') { + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + if (is_positional) { + if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr; + conv->precision.set_from_arg(ParseDigits(c, pos, end)); + if (c != '$') return nullptr; + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + } else { + conv->precision.set_from_arg(++*next_arg); + } + } else { + conv->precision.set_value(0); + } + } + } + + auto tag = GetTagForChar(c); + + if (ABSL_PREDICT_FALSE(c == 'v' && conv->flags != Flags::kBasic)) { + return nullptr; + } + + if (ABSL_PREDICT_FALSE(!tag.is_conv())) { + if (ABSL_PREDICT_FALSE(!tag.is_length())) return nullptr; + + // It is a length modifier. + using str_format_internal::LengthMod; + LengthMod length_mod = tag.as_length(); + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + if (c == 'h' && length_mod == LengthMod::h) { + conv->length_mod = LengthMod::hh; + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + } else if (c == 'l' && length_mod == LengthMod::l) { + conv->length_mod = LengthMod::ll; + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + } else { + conv->length_mod = length_mod; + } + tag = GetTagForChar(c); + + if (ABSL_PREDICT_FALSE(c == 'v')) return nullptr; + if (ABSL_PREDICT_FALSE(!tag.is_conv())) return nullptr; + } + + assert(CheckFastPathSetting(*conv)); + (void)(&CheckFastPathSetting); + + conv->conv = tag.as_conv(); + if (!is_positional) conv->arg_position = ++*next_arg; + return pos; +} + +// Consume conversion spec prefix (not including '%') of [p, end) if valid. +// Examples of valid specs would be e.g.: "s", "d", "-12.6f". +// If valid, it returns the first character following the conversion spec, +// and the spec part is broken down and returned in 'conv'. +// If invalid, returns nullptr. +constexpr const char* ConsumeUnboundConversion(const char* p, const char* end, + UnboundConversion* conv, + int* next_arg) { + if (*next_arg < 0) return ConsumeConversion<true>(p, end, conv, next_arg); + return ConsumeConversion<false>(p, end, conv, next_arg); +} + +} // namespace str_format_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_CONSTEXPR_PARSER_H_ diff --git a/absl/strings/internal/str_format/parser.cc b/absl/strings/internal/str_format/parser.cc index 13731ee2..5aaab698 100644 --- a/absl/strings/internal/str_format/parser.cc +++ b/absl/strings/internal/str_format/parser.cc @@ -31,211 +31,14 @@ namespace absl { ABSL_NAMESPACE_BEGIN namespace str_format_internal { -using CC = FormatConversionCharInternal; -using LM = LengthMod; +// Define the array for non-constexpr uses. +constexpr ConvTag ConvTagHolder::value[256]; -// Abbreviations to fit in the table below. -constexpr auto f_sign = Flags::kSignCol; -constexpr auto f_alt = Flags::kAlt; -constexpr auto f_pos = Flags::kShowPos; -constexpr auto f_left = Flags::kLeft; -constexpr auto f_zero = Flags::kZero; - -ABSL_CONST_INIT const ConvTag kTags[256] = { - {}, {}, {}, {}, {}, {}, {}, {}, // 00-07 - {}, {}, {}, {}, {}, {}, {}, {}, // 08-0f - {}, {}, {}, {}, {}, {}, {}, {}, // 10-17 - {}, {}, {}, {}, {}, {}, {}, {}, // 18-1f - f_sign, {}, {}, f_alt, {}, {}, {}, {}, // !"#$%&' - {}, {}, {}, f_pos, {}, f_left, {}, {}, // ()*+,-./ - f_zero, {}, {}, {}, {}, {}, {}, {}, // 01234567 - {}, {}, {}, {}, {}, {}, {}, {}, // 89:;<=>? - {}, CC::A, {}, {}, {}, CC::E, CC::F, CC::G, // @ABCDEFG - {}, {}, {}, {}, LM::L, {}, {}, {}, // HIJKLMNO - {}, {}, {}, {}, {}, {}, {}, {}, // PQRSTUVW - CC::X, {}, {}, {}, {}, {}, {}, {}, // XYZ[\]^_ - {}, CC::a, {}, CC::c, CC::d, CC::e, CC::f, CC::g, // `abcdefg - LM::h, CC::i, LM::j, {}, LM::l, {}, CC::n, CC::o, // hijklmno - CC::p, LM::q, {}, CC::s, LM::t, CC::u, CC::v, {}, // pqrstuvw - CC::x, {}, LM::z, {}, {}, {}, {}, {}, // xyz{|}! - {}, {}, {}, {}, {}, {}, {}, {}, // 80-87 - {}, {}, {}, {}, {}, {}, {}, {}, // 88-8f - {}, {}, {}, {}, {}, {}, {}, {}, // 90-97 - {}, {}, {}, {}, {}, {}, {}, {}, // 98-9f - {}, {}, {}, {}, {}, {}, {}, {}, // a0-a7 - {}, {}, {}, {}, {}, {}, {}, {}, // a8-af - {}, {}, {}, {}, {}, {}, {}, {}, // b0-b7 - {}, {}, {}, {}, {}, {}, {}, {}, // b8-bf - {}, {}, {}, {}, {}, {}, {}, {}, // c0-c7 - {}, {}, {}, {}, {}, {}, {}, {}, // c8-cf - {}, {}, {}, {}, {}, {}, {}, {}, // d0-d7 - {}, {}, {}, {}, {}, {}, {}, {}, // d8-df - {}, {}, {}, {}, {}, {}, {}, {}, // e0-e7 - {}, {}, {}, {}, {}, {}, {}, {}, // e8-ef - {}, {}, {}, {}, {}, {}, {}, {}, // f0-f7 - {}, {}, {}, {}, {}, {}, {}, {}, // f8-ff -}; - -namespace { - -bool CheckFastPathSetting(const UnboundConversion& conv) { - bool width_precision_needed = - conv.width.value() >= 0 || conv.precision.value() >= 0; - if (width_precision_needed && conv.flags == Flags::kBasic) { - fprintf(stderr, - "basic=%d left=%d show_pos=%d sign_col=%d alt=%d zero=%d " - "width=%d precision=%d\n", - conv.flags == Flags::kBasic ? 1 : 0, - FlagsContains(conv.flags, Flags::kLeft) ? 1 : 0, - FlagsContains(conv.flags, Flags::kShowPos) ? 1 : 0, - FlagsContains(conv.flags, Flags::kSignCol) ? 1 : 0, - FlagsContains(conv.flags, Flags::kAlt) ? 1 : 0, - FlagsContains(conv.flags, Flags::kZero) ? 1 : 0, conv.width.value(), - conv.precision.value()); - return false; - } - return true; -} - -template <bool is_positional> -const char *ConsumeConversion(const char *pos, const char *const end, - UnboundConversion *conv, int *next_arg) { - const char* const original_pos = pos; - char c; - // Read the next char into `c` and update `pos`. Returns false if there are - // no more chars to read. -#define ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR() \ - do { \ - if (ABSL_PREDICT_FALSE(pos == end)) return nullptr; \ - c = *pos++; \ - } while (0) - - const auto parse_digits = [&] { - int digits = c - '0'; - // We do not want to overflow `digits` so we consume at most digits10 - // digits. If there are more digits the parsing will fail later on when the - // digit doesn't match the expected characters. - int num_digits = std::numeric_limits<int>::digits10; - for (;;) { - if (ABSL_PREDICT_FALSE(pos == end)) break; - c = *pos++; - if (!std::isdigit(c)) break; - --num_digits; - if (ABSL_PREDICT_FALSE(!num_digits)) break; - digits = 10 * digits + c - '0'; - } - return digits; - }; - - if (is_positional) { - ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); - if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr; - conv->arg_position = parse_digits(); - assert(conv->arg_position > 0); - if (ABSL_PREDICT_FALSE(c != '$')) return nullptr; - } - - ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); - - // We should start with the basic flag on. - assert(conv->flags == Flags::kBasic); - - // Any non alpha character makes this conversion not basic. - // This includes flags (-+ #0), width (1-9, *) or precision (.). - // All conversion characters and length modifiers are alpha characters. - if (c < 'A') { - while (c <= '0') { - auto tag = GetTagForChar(c); - if (tag.is_flags()) { - conv->flags = conv->flags | tag.as_flags(); - ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); - } else { - break; - } - } - - if (c <= '9') { - if (c >= '0') { - int maybe_width = parse_digits(); - if (!is_positional && c == '$') { - if (ABSL_PREDICT_FALSE(*next_arg != 0)) return nullptr; - // Positional conversion. - *next_arg = -1; - return ConsumeConversion<true>(original_pos, end, conv, next_arg); - } - conv->flags = conv->flags | Flags::kNonBasic; - conv->width.set_value(maybe_width); - } else if (c == '*') { - conv->flags = conv->flags | Flags::kNonBasic; - ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); - if (is_positional) { - if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr; - conv->width.set_from_arg(parse_digits()); - if (ABSL_PREDICT_FALSE(c != '$')) return nullptr; - ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); - } else { - conv->width.set_from_arg(++*next_arg); - } - } - } - - if (c == '.') { - conv->flags = conv->flags | Flags::kNonBasic; - ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); - if (std::isdigit(c)) { - conv->precision.set_value(parse_digits()); - } else if (c == '*') { - ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); - if (is_positional) { - if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr; - conv->precision.set_from_arg(parse_digits()); - if (c != '$') return nullptr; - ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); - } else { - conv->precision.set_from_arg(++*next_arg); - } - } else { - conv->precision.set_value(0); - } - } - } - - auto tag = GetTagForChar(c); - - if (ABSL_PREDICT_FALSE(c == 'v' && (pos - original_pos) != 1)) return nullptr; - - if (ABSL_PREDICT_FALSE(!tag.is_conv())) { - if (ABSL_PREDICT_FALSE(!tag.is_length())) return nullptr; - - // It is a length modifier. - using str_format_internal::LengthMod; - LengthMod length_mod = tag.as_length(); - ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); - if (c == 'h' && length_mod == LengthMod::h) { - conv->length_mod = LengthMod::hh; - ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); - } else if (c == 'l' && length_mod == LengthMod::l) { - conv->length_mod = LengthMod::ll; - ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); - } else { - conv->length_mod = length_mod; - } - tag = GetTagForChar(c); - - if (ABSL_PREDICT_FALSE(c == 'v')) return nullptr; - if (ABSL_PREDICT_FALSE(!tag.is_conv())) return nullptr; - } - - assert(CheckFastPathSetting(*conv)); - (void)(&CheckFastPathSetting); - - conv->conv = tag.as_conv(); - if (!is_positional) conv->arg_position = ++*next_arg; - return pos; +ABSL_ATTRIBUTE_NOINLINE const char* ConsumeUnboundConversionNoInline( + const char* p, const char* end, UnboundConversion* conv, int* next_arg) { + return ConsumeUnboundConversion(p, end, conv, next_arg); } -} // namespace - std::string LengthModToString(LengthMod v) { switch (v) { case LengthMod::h: @@ -262,12 +65,6 @@ std::string LengthModToString(LengthMod v) { return ""; } -const char *ConsumeUnboundConversion(const char *p, const char *end, - UnboundConversion *conv, int *next_arg) { - if (*next_arg < 0) return ConsumeConversion<true>(p, end, conv, next_arg); - return ConsumeConversion<false>(p, end, conv, next_arg); -} - struct ParsedFormatBase::ParsedFormatConsumer { explicit ParsedFormatConsumer(ParsedFormatBase *parsedformat) : parsed(parsedformat), data_pos(parsedformat->data_.get()) {} diff --git a/absl/strings/internal/str_format/parser.h b/absl/strings/internal/str_format/parser.h index a81bac83..35b6d49c 100644 --- a/absl/strings/internal/str_format/parser.h +++ b/absl/strings/internal/str_format/parser.h @@ -29,111 +29,18 @@ #include <vector> #include "absl/strings/internal/str_format/checker.h" +#include "absl/strings/internal/str_format/constexpr_parser.h" #include "absl/strings/internal/str_format/extension.h" namespace absl { ABSL_NAMESPACE_BEGIN namespace str_format_internal { -enum class LengthMod : std::uint8_t { h, hh, l, ll, L, j, z, t, q, none }; - std::string LengthModToString(LengthMod v); -// The analyzed properties of a single specified conversion. -struct UnboundConversion { - UnboundConversion() {} - - class InputValue { - public: - void set_value(int value) { - assert(value >= 0); - value_ = value; - } - int value() const { return value_; } - - // Marks the value as "from arg". aka the '*' format. - // Requires `value >= 1`. - // When set, is_from_arg() return true and get_from_arg() returns the - // original value. - // `value()`'s return value is unspecfied in this state. - void set_from_arg(int value) { - assert(value > 0); - value_ = -value - 1; - } - bool is_from_arg() const { return value_ < -1; } - int get_from_arg() const { - assert(is_from_arg()); - return -value_ - 1; - } - - private: - int value_ = -1; - }; - - // No need to initialize. It will always be set in the parser. - int arg_position; - - InputValue width; - InputValue precision; - - Flags flags = Flags::kBasic; - LengthMod length_mod = LengthMod::none; - FormatConversionChar conv = FormatConversionCharInternal::kNone; -}; - -// Consume conversion spec prefix (not including '%') of [p, end) if valid. -// Examples of valid specs would be e.g.: "s", "d", "-12.6f". -// If valid, it returns the first character following the conversion spec, -// and the spec part is broken down and returned in 'conv'. -// If invalid, returns nullptr. -const char* ConsumeUnboundConversion(const char* p, const char* end, - UnboundConversion* conv, int* next_arg); - -// Helper tag class for the table below. -// It allows fast `char -> ConversionChar/LengthMod/Flags` checking and -// conversions. -class ConvTag { - public: - constexpr ConvTag(FormatConversionChar conversion_char) // NOLINT - : tag_(static_cast<uint8_t>(conversion_char)) {} - constexpr ConvTag(LengthMod length_mod) // NOLINT - : tag_(0x80 | static_cast<uint8_t>(length_mod)) {} - constexpr ConvTag(Flags flags) // NOLINT - : tag_(0xc0 | static_cast<uint8_t>(flags)) {} - constexpr ConvTag() : tag_(0xFF) {} - - bool is_conv() const { return (tag_ & 0x80) == 0; } - bool is_length() const { return (tag_ & 0xC0) == 0x80; } - bool is_flags() const { return (tag_ & 0xE0) == 0xC0; } - - FormatConversionChar as_conv() const { - assert(is_conv()); - assert(!is_length()); - assert(!is_flags()); - return static_cast<FormatConversionChar>(tag_); - } - LengthMod as_length() const { - assert(!is_conv()); - assert(is_length()); - assert(!is_flags()); - return static_cast<LengthMod>(tag_ & 0x3F); - } - Flags as_flags() const { - assert(!is_conv()); - assert(!is_length()); - assert(is_flags()); - return static_cast<Flags>(tag_ & 0x1F); - } - - private: - uint8_t tag_; -}; - -extern const ConvTag kTags[256]; -// Keep a single table for all the conversion chars and length modifiers. -inline ConvTag GetTagForChar(char c) { - return kTags[static_cast<unsigned char>(c)]; -} +const char* ConsumeUnboundConversionNoInline(const char* p, const char* end, + UnboundConversion* conv, + int* next_arg); // Parse the format string provided in 'src' and pass the identified items into // 'consumer'. @@ -187,7 +94,7 @@ bool ParseFormatString(string_view src, Consumer consumer) { } } else if (percent[1] != '%') { UnboundConversion conv; - p = ConsumeUnboundConversion(percent + 1, end, &conv, &next_arg); + p = ConsumeUnboundConversionNoInline(percent + 1, end, &conv, &next_arg); if (ABSL_PREDICT_FALSE(p == nullptr)) return false; if (ABSL_PREDICT_FALSE(!consumer.ConvertOne( conv, string_view(percent + 1, diff --git a/absl/strings/internal/str_format/parser_test.cc b/absl/strings/internal/str_format/parser_test.cc index c3e825fe..021f6a87 100644 --- a/absl/strings/internal/str_format/parser_test.cc +++ b/absl/strings/internal/str_format/parser_test.cc @@ -117,6 +117,7 @@ TEST_F(ConsumeUnboundConversionTest, ConsumeSpecification) { {__LINE__, "dzz", "d", "zz"}, // length mod as suffix {__LINE__, "3v", "", "3v"}, // 'v' cannot have modifiers {__LINE__, "hv", "", "hv"}, // 'v' cannot have modifiers + {__LINE__, "1$v", "1$v", ""}, // 'v' can have use posix syntax {__LINE__, "1$*2$d", "1$*2$d", "" }, // arg indexing and * allowed. {__LINE__, "0-14.3hhd", "0-14.3hhd", ""}, // precision, width {__LINE__, " 0-+#14.3hhd", " 0-+#14.3hhd", ""}, // flags diff --git a/absl/strings/internal/stringify_sink.cc b/absl/strings/internal/stringify_sink.cc new file mode 100644 index 00000000..7c6995ab --- /dev/null +++ b/absl/strings/internal/stringify_sink.cc @@ -0,0 +1,28 @@ +// Copyright 2022 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/stringify_sink.h" +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +void StringifySink::Append(size_t count, char ch) { buffer_.append(count, ch); } + +void StringifySink::Append(string_view v) { + buffer_.append(v.data(), v.size()); +} + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/strings/internal/stringify_sink.h b/absl/strings/internal/stringify_sink.h index a83f70e4..fc3747bb 100644 --- a/absl/strings/internal/stringify_sink.h +++ b/absl/strings/internal/stringify_sink.h @@ -31,8 +31,6 @@ class StringifySink { void Append(string_view v); - bool PutPaddedString(string_view v, int width, int precision, bool left); - // Support `absl::Format(&sink, format, args...)`. friend void AbslFormatFlush(StringifySink* sink, absl::string_view v) { sink->Append(v); @@ -51,15 +49,6 @@ string_view ExtractStringification(StringifySink& sink, const T& v) { return sink.buffer_; } -template <typename T, typename = void> -struct HasAbslStringify : std::false_type {}; - -template <typename T> -struct HasAbslStringify<T, std::enable_if_t<std::is_void<decltype(AbslStringify( - std::declval<strings_internal::StringifySink&>(), - std::declval<const T&>()))>::value>> - : std::true_type {}; - } // namespace strings_internal ABSL_NAMESPACE_END |