diff options
author | Matt Kulukundis <kfm@google.com> | 2024-01-29 12:14:16 -0800 |
---|---|---|
committer | Copybara-Service <copybara-worker@google.com> | 2024-01-29 12:15:25 -0800 |
commit | 9a79278a9793574cad2773b3445a887f949bc705 (patch) | |
tree | 3d7d0347715d51901113c1b3a62bc350b2c103e9 | |
parent | 42624b3d9d56ae6fd5c48b28715044667942b9d8 (diff) | |
download | abseil-9a79278a9793574cad2773b3445a887f949bc705.tar.gz abseil-9a79278a9793574cad2773b3445a887f949bc705.tar.bz2 abseil-9a79278a9793574cad2773b3445a887f949bc705.zip |
Fix a corner case in SpyHashState for exact boundaries.
AbslHash allows for piecewise chunks to be streamed incrementally into hash states and requires them to hash identically to one giant stream. The exact size window for this is an internal details `PiecewiseChunkSize`. There was an off by one error in this code. Add tests and fix it.
PiperOrigin-RevId: 602463183
Change-Id: I159bbb5e7e745f55b2fe6eaf0d2735bd0a08aca9
-rw-r--r-- | absl/hash/internal/spy_hash_state.h | 24 | ||||
-rw-r--r-- | absl/strings/BUILD.bazel | 1 | ||||
-rw-r--r-- | absl/strings/CMakeLists.txt | 1 | ||||
-rw-r--r-- | absl/strings/cord_test.cc | 21 |
4 files changed, 35 insertions, 12 deletions
diff --git a/absl/hash/internal/spy_hash_state.h b/absl/hash/internal/spy_hash_state.h index 09728266..357c301c 100644 --- a/absl/hash/internal/spy_hash_state.h +++ b/absl/hash/internal/spy_hash_state.h @@ -149,20 +149,20 @@ class SpyHashStateImpl : public HashStateBase<SpyHashStateImpl<T>> { const unsigned char* begin, size_t size) { const size_t large_chunk_stride = PiecewiseChunkSize(); - if (size > large_chunk_stride) { - // Combining a large contiguous buffer must have the same effect as - // doing it piecewise by the stride length, followed by the (possibly - // empty) remainder. - while (size >= large_chunk_stride) { - hash_state = SpyHashStateImpl::combine_contiguous( - std::move(hash_state), begin, large_chunk_stride); - begin += large_chunk_stride; - size -= large_chunk_stride; - } + // Combining a large contiguous buffer must have the same effect as + // doing it piecewise by the stride length, followed by the (possibly + // empty) remainder. + while (size > large_chunk_stride) { + hash_state = SpyHashStateImpl::combine_contiguous( + std::move(hash_state), begin, large_chunk_stride); + begin += large_chunk_stride; + size -= large_chunk_stride; } - hash_state.hash_representation_.emplace_back( - reinterpret_cast<const char*>(begin), size); + if (size > 0) { + hash_state.hash_representation_.emplace_back( + reinterpret_cast<const char*>(begin), size); + } return hash_state; } diff --git a/absl/strings/BUILD.bazel b/absl/strings/BUILD.bazel index 8e8371b3..ed2ab04f 100644 --- a/absl/strings/BUILD.bazel +++ b/absl/strings/BUILD.bazel @@ -918,6 +918,7 @@ cc_test( "//absl/container:fixed_array", "//absl/functional:function_ref", "//absl/hash", + "//absl/hash:hash_testing", "//absl/log", "//absl/log:check", "//absl/random", diff --git a/absl/strings/CMakeLists.txt b/absl/strings/CMakeLists.txt index 1b245362..ce9c5e46 100644 --- a/absl/strings/CMakeLists.txt +++ b/absl/strings/CMakeLists.txt @@ -1072,6 +1072,7 @@ absl_cc_test( absl::fixed_array absl::function_ref absl::hash + absl::hash_testing absl::log absl::optional absl::random_random diff --git a/absl/strings/cord_test.cc b/absl/strings/cord_test.cc index f1a5f39c..69b1a69e 100644 --- a/absl/strings/cord_test.cc +++ b/absl/strings/cord_test.cc @@ -42,6 +42,7 @@ #include "absl/container/fixed_array.h" #include "absl/functional/function_ref.h" #include "absl/hash/hash.h" +#include "absl/hash/hash_testing.h" #include "absl/log/check.h" #include "absl/log/log.h" #include "absl/random/random.h" @@ -2013,6 +2014,26 @@ TEST(CordTest, CordMemoryUsageBTree) { rep2_size); } +TEST(CordTest, TestHashFragmentation) { + // Make sure we hit these boundary cases precisely. + EXPECT_EQ(1024, absl::hash_internal::PiecewiseChunkSize()); + EXPECT_TRUE(absl::VerifyTypeImplementsAbslHashCorrectly({ + absl::Cord(), + absl::MakeFragmentedCord({std::string(600, 'a'), std::string(600, 'a')}), + absl::MakeFragmentedCord({std::string(1200, 'a')}), + absl::MakeFragmentedCord({std::string(900, 'b'), std::string(900, 'b')}), + absl::MakeFragmentedCord({std::string(1800, 'b')}), + absl::MakeFragmentedCord( + {std::string(2000, 'c'), std::string(2000, 'c')}), + absl::MakeFragmentedCord({std::string(4000, 'c')}), + absl::MakeFragmentedCord({std::string(1024, 'd')}), + absl::MakeFragmentedCord({std::string(1023, 'd'), "d"}), + absl::MakeFragmentedCord({std::string(1025, 'e')}), + absl::MakeFragmentedCord({std::string(1024, 'e'), "e"}), + absl::MakeFragmentedCord({std::string(1023, 'e'), "e", "e"}), + })); +} + // Regtest for a change that had to be rolled back because it expanded out // of the InlineRep too soon, which was observable through MemoryUsage(). TEST_P(CordTest, CordMemoryUsageInlineRep) { |