aboutsummaryrefslogtreecommitdiff
path: root/absl/container/internal/raw_hash_set_test.cc
diff options
context:
space:
mode:
authorEvan Brown <ezb@google.com>2024-03-19 12:07:34 -0700
committerCopybara-Service <copybara-worker@google.com>2024-03-19 12:08:27 -0700
commit1980d7b98a0cae64432ccf9c4afebda5e21b2c5e (patch)
tree89fa98715f94d4f0221080158e1c798dc5cf52c0 /absl/container/internal/raw_hash_set_test.cc
parent43c36ffae6256b4c93c1b9d3c73efaf8c2c891ab (diff)
downloadabseil-1980d7b98a0cae64432ccf9c4afebda5e21b2c5e.tar.gz
abseil-1980d7b98a0cae64432ccf9c4afebda5e21b2c5e.tar.bz2
abseil-1980d7b98a0cae64432ccf9c4afebda5e21b2c5e.zip
Do hashtablez sampling on the first insertion into an empty SOO hashtable.
When sampling triggers, we skip SOO and allocate a backing array. We must do this because the HashtablezInfoHandle is part of the heap allocation (along with the control bytes and slots). By default, we sample 1 in ~1024 hashtables when sampling is enabled. This will impact performance because (a) we won't benefit from SOO so we would have worse data locality (more cache/TLB misses), and (b) the backing array capacity will be 3 instead of 1 so (b.1) we skip the rehash after the second insertion and (b.2) we potentially waste up to two slots worth of memory. We also add an soo_capacity field to HashtablezInfo to allow for distinguishing which sampled tables may otherwise have been SOO - this will allow us to know approximately what fraction of tables are in SOO mode. PiperOrigin-RevId: 617252334 Change-Id: Ib48b7a4870bd74ea3ba923ed8f350a3b75dbb7d3
Diffstat (limited to 'absl/container/internal/raw_hash_set_test.cc')
-rw-r--r--absl/container/internal/raw_hash_set_test.cc184
1 files changed, 163 insertions, 21 deletions
diff --git a/absl/container/internal/raw_hash_set_test.cc b/absl/container/internal/raw_hash_set_test.cc
index 66baeb64..f00cef8c 100644
--- a/absl/container/internal/raw_hash_set_test.cc
+++ b/absl/container/internal/raw_hash_set_test.cc
@@ -52,7 +52,9 @@
#include "absl/container/internal/hashtable_debug.h"
#include "absl/container/internal/hashtablez_sampler.h"
#include "absl/container/internal/test_allocator.h"
+#include "absl/functional/function_ref.h"
#include "absl/hash/hash.h"
+#include "absl/log/check.h"
#include "absl/log/log.h"
#include "absl/memory/memory.h"
#include "absl/meta/type_traits.h"
@@ -2035,6 +2037,9 @@ TYPED_TEST_P(SooTest, FindFullDeletedRegression) {
}
TYPED_TEST_P(SooTest, ReplacingDeletedSlotDoesNotRehash) {
+ // We need to disable hashtablez to avoid issues related to SOO and sampling.
+ SetHashtablezEnabled(false);
+
size_t n;
{
// Compute n such that n is the maximum number of elements before rehash.
@@ -2388,6 +2393,9 @@ TYPED_TEST_P(SooTest, IterationOrderChangesOnRehash) {
// Verify that pointers are invalidated as soon as a second element is inserted.
// This prevents dependency on pointer stability on small tables.
TYPED_TEST_P(SooTest, UnstablePointers) {
+ // We need to disable hashtablez to avoid issues related to SOO and sampling.
+ SetHashtablezEnabled(false);
+
TypeParam table;
const auto addr = [&](int i) {
@@ -2523,7 +2531,7 @@ TYPED_TEST_P(RawHashSamplerTest, Sample) {
constexpr bool soo_enabled = std::is_same<SooIntTable, TypeParam>::value;
// Enable the feature even if the prod default is off.
SetHashtablezEnabled(true);
- SetHashtablezSampleParameter(100);
+ SetHashtablezSampleParameter(100); // Sample ~1% of tables.
auto& sampler = GlobalHashtablezSampler();
size_t start_size = 0;
@@ -2557,25 +2565,26 @@ TYPED_TEST_P(RawHashSamplerTest, Sample) {
absl::flat_hash_map<size_t, int> observed_checksums;
absl::flat_hash_map<ssize_t, int> reservations;
end_size += sampler.Iterate([&](const HashtablezInfo& info) {
- if (preexisting_info.count(&info) == 0) {
- observed_checksums[info.hashes_bitwise_xor.load(
- std::memory_order_relaxed)]++;
- reservations[info.max_reserve.load(std::memory_order_relaxed)]++;
- }
- EXPECT_EQ(info.inline_element_size, sizeof(typename TypeParam::value_type));
++end_size;
+ if (preexisting_info.contains(&info)) return;
+ observed_checksums[info.hashes_bitwise_xor.load(
+ std::memory_order_relaxed)]++;
+ reservations[info.max_reserve.load(std::memory_order_relaxed)]++;
+ EXPECT_EQ(info.inline_element_size, sizeof(typename TypeParam::value_type));
+
+ if (soo_enabled) {
+ EXPECT_EQ(info.soo_capacity, SooCapacity());
+ } else {
+ EXPECT_EQ(info.soo_capacity, 0);
+ }
});
+ // Expect that we sampled at the requested sampling rate of ~1%.
EXPECT_NEAR((end_size - start_size) / static_cast<double>(tables.size()),
0.01, 0.005);
- if (soo_enabled) {
- EXPECT_EQ(observed_checksums.size(), 9);
- } else {
- EXPECT_EQ(observed_checksums.size(), 5);
- for (const auto& [_, count] : observed_checksums) {
- EXPECT_NEAR((100 * count) / static_cast<double>(tables.size()), 0.2,
- 0.05);
- }
+ EXPECT_EQ(observed_checksums.size(), 5);
+ for (const auto& [_, count] : observed_checksums) {
+ EXPECT_NEAR((100 * count) / static_cast<double>(tables.size()), 0.2, 0.05);
}
EXPECT_EQ(reservations.size(), 10);
@@ -2591,12 +2600,141 @@ TYPED_TEST_P(RawHashSamplerTest, Sample) {
REGISTER_TYPED_TEST_SUITE_P(RawHashSamplerTest, Sample);
using RawHashSamplerTestTypes = ::testing::Types<SooIntTable, NonSooIntTable>;
INSTANTIATE_TYPED_TEST_SUITE_P(My, RawHashSamplerTest, RawHashSamplerTestTypes);
+
+std::vector<const HashtablezInfo*> SampleSooMutation(
+ absl::FunctionRef<void(SooIntTable&)> mutate_table) {
+ // Enable the feature even if the prod default is off.
+ SetHashtablezEnabled(true);
+ SetHashtablezSampleParameter(100); // Sample ~1% of tables.
+
+ auto& sampler = GlobalHashtablezSampler();
+ size_t start_size = 0;
+ absl::flat_hash_set<const HashtablezInfo*> preexisting_info;
+ start_size += sampler.Iterate([&](const HashtablezInfo& info) {
+ preexisting_info.insert(&info);
+ ++start_size;
+ });
+
+ std::vector<SooIntTable> tables;
+ for (int i = 0; i < 1000000; ++i) {
+ tables.emplace_back();
+ mutate_table(tables.back());
+ }
+ size_t end_size = 0;
+ std::vector<const HashtablezInfo*> infos;
+ end_size += sampler.Iterate([&](const HashtablezInfo& info) {
+ ++end_size;
+ if (preexisting_info.contains(&info)) return;
+ infos.push_back(&info);
+ });
+
+ // Expect that we sampled at the requested sampling rate of ~1%.
+ EXPECT_NEAR((end_size - start_size) / static_cast<double>(tables.size()),
+ 0.01, 0.005);
+ return infos;
+}
+
+TEST(RawHashSamplerTest, SooTableInsertToEmpty) {
+ if (SooIntTable().capacity() != SooCapacity()) {
+ CHECK_LT(sizeof(void*), 8) << "missing SOO coverage";
+ GTEST_SKIP() << "not SOO on this platform";
+ }
+ std::vector<const HashtablezInfo*> infos =
+ SampleSooMutation([](SooIntTable& t) { t.insert(1); });
+
+ for (const HashtablezInfo* info : infos) {
+ ASSERT_EQ(info->inline_element_size,
+ sizeof(typename SooIntTable::value_type));
+ ASSERT_EQ(info->soo_capacity, SooCapacity());
+ ASSERT_EQ(info->capacity, NextCapacity(SooCapacity()));
+ ASSERT_EQ(info->size, 1);
+ ASSERT_EQ(info->max_reserve, 0);
+ ASSERT_EQ(info->num_erases, 0);
+ ASSERT_EQ(info->max_probe_length, 0);
+ ASSERT_EQ(info->total_probe_length, 0);
+ }
+}
+
+TEST(RawHashSamplerTest, SooTableReserveToEmpty) {
+ if (SooIntTable().capacity() != SooCapacity()) {
+ CHECK_LT(sizeof(void*), 8) << "missing SOO coverage";
+ GTEST_SKIP() << "not SOO on this platform";
+ }
+ std::vector<const HashtablezInfo*> infos =
+ SampleSooMutation([](SooIntTable& t) { t.reserve(100); });
+
+ for (const HashtablezInfo* info : infos) {
+ ASSERT_EQ(info->inline_element_size,
+ sizeof(typename SooIntTable::value_type));
+ ASSERT_EQ(info->soo_capacity, SooCapacity());
+ ASSERT_GE(info->capacity, 100);
+ ASSERT_EQ(info->size, 0);
+ ASSERT_EQ(info->max_reserve, 100);
+ ASSERT_EQ(info->num_erases, 0);
+ ASSERT_EQ(info->max_probe_length, 0);
+ ASSERT_EQ(info->total_probe_length, 0);
+ }
+}
+
+// This tests that reserve on a full SOO table doesn't incorrectly result in new
+// (over-)sampling.
+TEST(RawHashSamplerTest, SooTableReserveToFullSoo) {
+ if (SooIntTable().capacity() != SooCapacity()) {
+ CHECK_LT(sizeof(void*), 8) << "missing SOO coverage";
+ GTEST_SKIP() << "not SOO on this platform";
+ }
+ std::vector<const HashtablezInfo*> infos =
+ SampleSooMutation([](SooIntTable& t) {
+ t.insert(1);
+ t.reserve(100);
+ });
+
+ for (const HashtablezInfo* info : infos) {
+ ASSERT_EQ(info->inline_element_size,
+ sizeof(typename SooIntTable::value_type));
+ ASSERT_EQ(info->soo_capacity, SooCapacity());
+ ASSERT_GE(info->capacity, 100);
+ ASSERT_EQ(info->size, 1);
+ ASSERT_EQ(info->max_reserve, 100);
+ ASSERT_EQ(info->num_erases, 0);
+ ASSERT_EQ(info->max_probe_length, 0);
+ ASSERT_EQ(info->total_probe_length, 0);
+ }
+}
+
+// This tests that rehash(0) on a sampled table with size that fits in SOO
+// doesn't incorrectly result in losing sampling.
+TEST(RawHashSamplerTest, SooTableRehashShrinkWhenSizeFitsInSoo) {
+ if (SooIntTable().capacity() != SooCapacity()) {
+ CHECK_LT(sizeof(void*), 8) << "missing SOO coverage";
+ GTEST_SKIP() << "not SOO on this platform";
+ }
+ std::vector<const HashtablezInfo*> infos =
+ SampleSooMutation([](SooIntTable& t) {
+ t.reserve(100);
+ t.insert(1);
+ EXPECT_GE(t.capacity(), 100);
+ t.rehash(0);
+ });
+
+ for (const HashtablezInfo* info : infos) {
+ ASSERT_EQ(info->inline_element_size,
+ sizeof(typename SooIntTable::value_type));
+ ASSERT_EQ(info->soo_capacity, SooCapacity());
+ ASSERT_EQ(info->capacity, NextCapacity(SooCapacity()));
+ ASSERT_EQ(info->size, 1);
+ ASSERT_EQ(info->max_reserve, 100);
+ ASSERT_EQ(info->num_erases, 0);
+ ASSERT_EQ(info->max_probe_length, 0);
+ ASSERT_EQ(info->total_probe_length, 0);
+ }
+}
#endif // ABSL_INTERNAL_HASHTABLEZ_SAMPLE
TEST(RawHashSamplerTest, DoNotSampleCustomAllocators) {
// Enable the feature even if the prod default is off.
SetHashtablezEnabled(true);
- SetHashtablezSampleParameter(100);
+ SetHashtablezSampleParameter(100); // Sample ~1% of tables.
auto& sampler = GlobalHashtablezSampler();
size_t start_size = 0;
@@ -2974,7 +3112,7 @@ TYPED_TEST_P(SooTable, Basic) {
bool frozen = true;
TypeParam t{FreezableAlloc<typename TypeParam::value_type>(&frozen)};
if (t.capacity() != SooCapacity()) {
- EXPECT_LT(sizeof(void*), 8);
+ CHECK_LT(sizeof(void*), 8) << "missing SOO coverage";
GTEST_SKIP() << "not SOO on this platform";
}
@@ -3006,14 +3144,18 @@ using FreezableSooTableTypes =
FreezableSizedValueSooTable<16>>;
INSTANTIATE_TYPED_TEST_SUITE_P(My, SooTable, FreezableSooTableTypes);
-TEST(Table, RehashToSoo) {
+TEST(Table, RehashToSooUnsampled) {
SooIntTable t;
if (t.capacity() != SooCapacity()) {
- EXPECT_LT(sizeof(void*), 8);
+ CHECK_LT(sizeof(void*), 8) << "missing SOO coverage";
GTEST_SKIP() << "not SOO on this platform";
}
- t.reserve(10);
+ // We disable hashtablez sampling for this test to ensure that the table isn't
+ // sampled. When the table is sampled, it won't rehash down to SOO.
+ SetHashtablezEnabled(false);
+
+ t.reserve(100);
t.insert(0);
EXPECT_EQ(*t.begin(), 0);
@@ -3026,7 +3168,7 @@ TEST(Table, RehashToSoo) {
EXPECT_EQ(t.find(1), t.end());
}
-TEST(Table, ResizeToNonSoo) {
+TEST(Table, ReserveToNonSoo) {
for (int reserve_capacity : {8, 100000}) {
SooIntTable t;
t.insert(0);