aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt Kulukundis <kfm@google.com>2024-01-29 12:14:16 -0800
committerCopybara-Service <copybara-worker@google.com>2024-01-29 12:15:25 -0800
commit9a79278a9793574cad2773b3445a887f949bc705 (patch)
tree3d7d0347715d51901113c1b3a62bc350b2c103e9
parent42624b3d9d56ae6fd5c48b28715044667942b9d8 (diff)
downloadabseil-9a79278a9793574cad2773b3445a887f949bc705.tar.gz
abseil-9a79278a9793574cad2773b3445a887f949bc705.tar.bz2
abseil-9a79278a9793574cad2773b3445a887f949bc705.zip
Fix a corner case in SpyHashState for exact boundaries.
AbslHash allows for piecewise chunks to be streamed incrementally into hash states and requires them to hash identically to one giant stream. The exact size window for this is an internal details `PiecewiseChunkSize`. There was an off by one error in this code. Add tests and fix it. PiperOrigin-RevId: 602463183 Change-Id: I159bbb5e7e745f55b2fe6eaf0d2735bd0a08aca9
-rw-r--r--absl/hash/internal/spy_hash_state.h24
-rw-r--r--absl/strings/BUILD.bazel1
-rw-r--r--absl/strings/CMakeLists.txt1
-rw-r--r--absl/strings/cord_test.cc21
4 files changed, 35 insertions, 12 deletions
diff --git a/absl/hash/internal/spy_hash_state.h b/absl/hash/internal/spy_hash_state.h
index 09728266..357c301c 100644
--- a/absl/hash/internal/spy_hash_state.h
+++ b/absl/hash/internal/spy_hash_state.h
@@ -149,20 +149,20 @@ class SpyHashStateImpl : public HashStateBase<SpyHashStateImpl<T>> {
const unsigned char* begin,
size_t size) {
const size_t large_chunk_stride = PiecewiseChunkSize();
- if (size > large_chunk_stride) {
- // Combining a large contiguous buffer must have the same effect as
- // doing it piecewise by the stride length, followed by the (possibly
- // empty) remainder.
- while (size >= large_chunk_stride) {
- hash_state = SpyHashStateImpl::combine_contiguous(
- std::move(hash_state), begin, large_chunk_stride);
- begin += large_chunk_stride;
- size -= large_chunk_stride;
- }
+ // Combining a large contiguous buffer must have the same effect as
+ // doing it piecewise by the stride length, followed by the (possibly
+ // empty) remainder.
+ while (size > large_chunk_stride) {
+ hash_state = SpyHashStateImpl::combine_contiguous(
+ std::move(hash_state), begin, large_chunk_stride);
+ begin += large_chunk_stride;
+ size -= large_chunk_stride;
}
- hash_state.hash_representation_.emplace_back(
- reinterpret_cast<const char*>(begin), size);
+ if (size > 0) {
+ hash_state.hash_representation_.emplace_back(
+ reinterpret_cast<const char*>(begin), size);
+ }
return hash_state;
}
diff --git a/absl/strings/BUILD.bazel b/absl/strings/BUILD.bazel
index 8e8371b3..ed2ab04f 100644
--- a/absl/strings/BUILD.bazel
+++ b/absl/strings/BUILD.bazel
@@ -918,6 +918,7 @@ cc_test(
"//absl/container:fixed_array",
"//absl/functional:function_ref",
"//absl/hash",
+ "//absl/hash:hash_testing",
"//absl/log",
"//absl/log:check",
"//absl/random",
diff --git a/absl/strings/CMakeLists.txt b/absl/strings/CMakeLists.txt
index 1b245362..ce9c5e46 100644
--- a/absl/strings/CMakeLists.txt
+++ b/absl/strings/CMakeLists.txt
@@ -1072,6 +1072,7 @@ absl_cc_test(
absl::fixed_array
absl::function_ref
absl::hash
+ absl::hash_testing
absl::log
absl::optional
absl::random_random
diff --git a/absl/strings/cord_test.cc b/absl/strings/cord_test.cc
index f1a5f39c..69b1a69e 100644
--- a/absl/strings/cord_test.cc
+++ b/absl/strings/cord_test.cc
@@ -42,6 +42,7 @@
#include "absl/container/fixed_array.h"
#include "absl/functional/function_ref.h"
#include "absl/hash/hash.h"
+#include "absl/hash/hash_testing.h"
#include "absl/log/check.h"
#include "absl/log/log.h"
#include "absl/random/random.h"
@@ -2013,6 +2014,26 @@ TEST(CordTest, CordMemoryUsageBTree) {
rep2_size);
}
+TEST(CordTest, TestHashFragmentation) {
+ // Make sure we hit these boundary cases precisely.
+ EXPECT_EQ(1024, absl::hash_internal::PiecewiseChunkSize());
+ EXPECT_TRUE(absl::VerifyTypeImplementsAbslHashCorrectly({
+ absl::Cord(),
+ absl::MakeFragmentedCord({std::string(600, 'a'), std::string(600, 'a')}),
+ absl::MakeFragmentedCord({std::string(1200, 'a')}),
+ absl::MakeFragmentedCord({std::string(900, 'b'), std::string(900, 'b')}),
+ absl::MakeFragmentedCord({std::string(1800, 'b')}),
+ absl::MakeFragmentedCord(
+ {std::string(2000, 'c'), std::string(2000, 'c')}),
+ absl::MakeFragmentedCord({std::string(4000, 'c')}),
+ absl::MakeFragmentedCord({std::string(1024, 'd')}),
+ absl::MakeFragmentedCord({std::string(1023, 'd'), "d"}),
+ absl::MakeFragmentedCord({std::string(1025, 'e')}),
+ absl::MakeFragmentedCord({std::string(1024, 'e'), "e"}),
+ absl::MakeFragmentedCord({std::string(1023, 'e'), "e", "e"}),
+ }));
+}
+
// Regtest for a change that had to be rolled back because it expanded out
// of the InlineRep too soon, which was observable through MemoryUsage().
TEST_P(CordTest, CordMemoryUsageInlineRep) {