diff options
author | Evan Brown <ezb@google.com> | 2024-03-19 12:07:34 -0700 |
---|---|---|
committer | Copybara-Service <copybara-worker@google.com> | 2024-03-19 12:08:27 -0700 |
commit | 1980d7b98a0cae64432ccf9c4afebda5e21b2c5e (patch) | |
tree | 89fa98715f94d4f0221080158e1c798dc5cf52c0 /absl/container/internal/raw_hash_set_test.cc | |
parent | 43c36ffae6256b4c93c1b9d3c73efaf8c2c891ab (diff) | |
download | abseil-1980d7b98a0cae64432ccf9c4afebda5e21b2c5e.tar.gz abseil-1980d7b98a0cae64432ccf9c4afebda5e21b2c5e.tar.bz2 abseil-1980d7b98a0cae64432ccf9c4afebda5e21b2c5e.zip |
Do hashtablez sampling on the first insertion into an empty SOO hashtable.
When sampling triggers, we skip SOO and allocate a backing array. We must do this because the HashtablezInfoHandle is part of the heap allocation (along with the control bytes and slots). By default, we sample 1 in ~1024 hashtables when sampling is enabled. This will impact performance because (a) we won't benefit from SOO so we would have worse data locality (more cache/TLB misses), and (b) the backing array capacity will be 3 instead of 1 so (b.1) we skip the rehash after the second insertion and (b.2) we potentially waste up to two slots worth of memory.
We also add an soo_capacity field to HashtablezInfo to allow for distinguishing which sampled tables may otherwise have been SOO - this will allow us to know approximately what fraction of tables are in SOO mode.
PiperOrigin-RevId: 617252334
Change-Id: Ib48b7a4870bd74ea3ba923ed8f350a3b75dbb7d3
Diffstat (limited to 'absl/container/internal/raw_hash_set_test.cc')
-rw-r--r-- | absl/container/internal/raw_hash_set_test.cc | 184 |
1 files changed, 163 insertions, 21 deletions
diff --git a/absl/container/internal/raw_hash_set_test.cc b/absl/container/internal/raw_hash_set_test.cc index 66baeb64..f00cef8c 100644 --- a/absl/container/internal/raw_hash_set_test.cc +++ b/absl/container/internal/raw_hash_set_test.cc @@ -52,7 +52,9 @@ #include "absl/container/internal/hashtable_debug.h" #include "absl/container/internal/hashtablez_sampler.h" #include "absl/container/internal/test_allocator.h" +#include "absl/functional/function_ref.h" #include "absl/hash/hash.h" +#include "absl/log/check.h" #include "absl/log/log.h" #include "absl/memory/memory.h" #include "absl/meta/type_traits.h" @@ -2035,6 +2037,9 @@ TYPED_TEST_P(SooTest, FindFullDeletedRegression) { } TYPED_TEST_P(SooTest, ReplacingDeletedSlotDoesNotRehash) { + // We need to disable hashtablez to avoid issues related to SOO and sampling. + SetHashtablezEnabled(false); + size_t n; { // Compute n such that n is the maximum number of elements before rehash. @@ -2388,6 +2393,9 @@ TYPED_TEST_P(SooTest, IterationOrderChangesOnRehash) { // Verify that pointers are invalidated as soon as a second element is inserted. // This prevents dependency on pointer stability on small tables. TYPED_TEST_P(SooTest, UnstablePointers) { + // We need to disable hashtablez to avoid issues related to SOO and sampling. + SetHashtablezEnabled(false); + TypeParam table; const auto addr = [&](int i) { @@ -2523,7 +2531,7 @@ TYPED_TEST_P(RawHashSamplerTest, Sample) { constexpr bool soo_enabled = std::is_same<SooIntTable, TypeParam>::value; // Enable the feature even if the prod default is off. SetHashtablezEnabled(true); - SetHashtablezSampleParameter(100); + SetHashtablezSampleParameter(100); // Sample ~1% of tables. auto& sampler = GlobalHashtablezSampler(); size_t start_size = 0; @@ -2557,25 +2565,26 @@ TYPED_TEST_P(RawHashSamplerTest, Sample) { absl::flat_hash_map<size_t, int> observed_checksums; absl::flat_hash_map<ssize_t, int> reservations; end_size += sampler.Iterate([&](const HashtablezInfo& info) { - if (preexisting_info.count(&info) == 0) { - observed_checksums[info.hashes_bitwise_xor.load( - std::memory_order_relaxed)]++; - reservations[info.max_reserve.load(std::memory_order_relaxed)]++; - } - EXPECT_EQ(info.inline_element_size, sizeof(typename TypeParam::value_type)); ++end_size; + if (preexisting_info.contains(&info)) return; + observed_checksums[info.hashes_bitwise_xor.load( + std::memory_order_relaxed)]++; + reservations[info.max_reserve.load(std::memory_order_relaxed)]++; + EXPECT_EQ(info.inline_element_size, sizeof(typename TypeParam::value_type)); + + if (soo_enabled) { + EXPECT_EQ(info.soo_capacity, SooCapacity()); + } else { + EXPECT_EQ(info.soo_capacity, 0); + } }); + // Expect that we sampled at the requested sampling rate of ~1%. EXPECT_NEAR((end_size - start_size) / static_cast<double>(tables.size()), 0.01, 0.005); - if (soo_enabled) { - EXPECT_EQ(observed_checksums.size(), 9); - } else { - EXPECT_EQ(observed_checksums.size(), 5); - for (const auto& [_, count] : observed_checksums) { - EXPECT_NEAR((100 * count) / static_cast<double>(tables.size()), 0.2, - 0.05); - } + EXPECT_EQ(observed_checksums.size(), 5); + for (const auto& [_, count] : observed_checksums) { + EXPECT_NEAR((100 * count) / static_cast<double>(tables.size()), 0.2, 0.05); } EXPECT_EQ(reservations.size(), 10); @@ -2591,12 +2600,141 @@ TYPED_TEST_P(RawHashSamplerTest, Sample) { REGISTER_TYPED_TEST_SUITE_P(RawHashSamplerTest, Sample); using RawHashSamplerTestTypes = ::testing::Types<SooIntTable, NonSooIntTable>; INSTANTIATE_TYPED_TEST_SUITE_P(My, RawHashSamplerTest, RawHashSamplerTestTypes); + +std::vector<const HashtablezInfo*> SampleSooMutation( + absl::FunctionRef<void(SooIntTable&)> mutate_table) { + // Enable the feature even if the prod default is off. + SetHashtablezEnabled(true); + SetHashtablezSampleParameter(100); // Sample ~1% of tables. + + auto& sampler = GlobalHashtablezSampler(); + size_t start_size = 0; + absl::flat_hash_set<const HashtablezInfo*> preexisting_info; + start_size += sampler.Iterate([&](const HashtablezInfo& info) { + preexisting_info.insert(&info); + ++start_size; + }); + + std::vector<SooIntTable> tables; + for (int i = 0; i < 1000000; ++i) { + tables.emplace_back(); + mutate_table(tables.back()); + } + size_t end_size = 0; + std::vector<const HashtablezInfo*> infos; + end_size += sampler.Iterate([&](const HashtablezInfo& info) { + ++end_size; + if (preexisting_info.contains(&info)) return; + infos.push_back(&info); + }); + + // Expect that we sampled at the requested sampling rate of ~1%. + EXPECT_NEAR((end_size - start_size) / static_cast<double>(tables.size()), + 0.01, 0.005); + return infos; +} + +TEST(RawHashSamplerTest, SooTableInsertToEmpty) { + if (SooIntTable().capacity() != SooCapacity()) { + CHECK_LT(sizeof(void*), 8) << "missing SOO coverage"; + GTEST_SKIP() << "not SOO on this platform"; + } + std::vector<const HashtablezInfo*> infos = + SampleSooMutation([](SooIntTable& t) { t.insert(1); }); + + for (const HashtablezInfo* info : infos) { + ASSERT_EQ(info->inline_element_size, + sizeof(typename SooIntTable::value_type)); + ASSERT_EQ(info->soo_capacity, SooCapacity()); + ASSERT_EQ(info->capacity, NextCapacity(SooCapacity())); + ASSERT_EQ(info->size, 1); + ASSERT_EQ(info->max_reserve, 0); + ASSERT_EQ(info->num_erases, 0); + ASSERT_EQ(info->max_probe_length, 0); + ASSERT_EQ(info->total_probe_length, 0); + } +} + +TEST(RawHashSamplerTest, SooTableReserveToEmpty) { + if (SooIntTable().capacity() != SooCapacity()) { + CHECK_LT(sizeof(void*), 8) << "missing SOO coverage"; + GTEST_SKIP() << "not SOO on this platform"; + } + std::vector<const HashtablezInfo*> infos = + SampleSooMutation([](SooIntTable& t) { t.reserve(100); }); + + for (const HashtablezInfo* info : infos) { + ASSERT_EQ(info->inline_element_size, + sizeof(typename SooIntTable::value_type)); + ASSERT_EQ(info->soo_capacity, SooCapacity()); + ASSERT_GE(info->capacity, 100); + ASSERT_EQ(info->size, 0); + ASSERT_EQ(info->max_reserve, 100); + ASSERT_EQ(info->num_erases, 0); + ASSERT_EQ(info->max_probe_length, 0); + ASSERT_EQ(info->total_probe_length, 0); + } +} + +// This tests that reserve on a full SOO table doesn't incorrectly result in new +// (over-)sampling. +TEST(RawHashSamplerTest, SooTableReserveToFullSoo) { + if (SooIntTable().capacity() != SooCapacity()) { + CHECK_LT(sizeof(void*), 8) << "missing SOO coverage"; + GTEST_SKIP() << "not SOO on this platform"; + } + std::vector<const HashtablezInfo*> infos = + SampleSooMutation([](SooIntTable& t) { + t.insert(1); + t.reserve(100); + }); + + for (const HashtablezInfo* info : infos) { + ASSERT_EQ(info->inline_element_size, + sizeof(typename SooIntTable::value_type)); + ASSERT_EQ(info->soo_capacity, SooCapacity()); + ASSERT_GE(info->capacity, 100); + ASSERT_EQ(info->size, 1); + ASSERT_EQ(info->max_reserve, 100); + ASSERT_EQ(info->num_erases, 0); + ASSERT_EQ(info->max_probe_length, 0); + ASSERT_EQ(info->total_probe_length, 0); + } +} + +// This tests that rehash(0) on a sampled table with size that fits in SOO +// doesn't incorrectly result in losing sampling. +TEST(RawHashSamplerTest, SooTableRehashShrinkWhenSizeFitsInSoo) { + if (SooIntTable().capacity() != SooCapacity()) { + CHECK_LT(sizeof(void*), 8) << "missing SOO coverage"; + GTEST_SKIP() << "not SOO on this platform"; + } + std::vector<const HashtablezInfo*> infos = + SampleSooMutation([](SooIntTable& t) { + t.reserve(100); + t.insert(1); + EXPECT_GE(t.capacity(), 100); + t.rehash(0); + }); + + for (const HashtablezInfo* info : infos) { + ASSERT_EQ(info->inline_element_size, + sizeof(typename SooIntTable::value_type)); + ASSERT_EQ(info->soo_capacity, SooCapacity()); + ASSERT_EQ(info->capacity, NextCapacity(SooCapacity())); + ASSERT_EQ(info->size, 1); + ASSERT_EQ(info->max_reserve, 100); + ASSERT_EQ(info->num_erases, 0); + ASSERT_EQ(info->max_probe_length, 0); + ASSERT_EQ(info->total_probe_length, 0); + } +} #endif // ABSL_INTERNAL_HASHTABLEZ_SAMPLE TEST(RawHashSamplerTest, DoNotSampleCustomAllocators) { // Enable the feature even if the prod default is off. SetHashtablezEnabled(true); - SetHashtablezSampleParameter(100); + SetHashtablezSampleParameter(100); // Sample ~1% of tables. auto& sampler = GlobalHashtablezSampler(); size_t start_size = 0; @@ -2974,7 +3112,7 @@ TYPED_TEST_P(SooTable, Basic) { bool frozen = true; TypeParam t{FreezableAlloc<typename TypeParam::value_type>(&frozen)}; if (t.capacity() != SooCapacity()) { - EXPECT_LT(sizeof(void*), 8); + CHECK_LT(sizeof(void*), 8) << "missing SOO coverage"; GTEST_SKIP() << "not SOO on this platform"; } @@ -3006,14 +3144,18 @@ using FreezableSooTableTypes = FreezableSizedValueSooTable<16>>; INSTANTIATE_TYPED_TEST_SUITE_P(My, SooTable, FreezableSooTableTypes); -TEST(Table, RehashToSoo) { +TEST(Table, RehashToSooUnsampled) { SooIntTable t; if (t.capacity() != SooCapacity()) { - EXPECT_LT(sizeof(void*), 8); + CHECK_LT(sizeof(void*), 8) << "missing SOO coverage"; GTEST_SKIP() << "not SOO on this platform"; } - t.reserve(10); + // We disable hashtablez sampling for this test to ensure that the table isn't + // sampled. When the table is sampled, it won't rehash down to SOO. + SetHashtablezEnabled(false); + + t.reserve(100); t.insert(0); EXPECT_EQ(*t.begin(), 0); @@ -3026,7 +3168,7 @@ TEST(Table, RehashToSoo) { EXPECT_EQ(t.find(1), t.end()); } -TEST(Table, ResizeToNonSoo) { +TEST(Table, ReserveToNonSoo) { for (int reserve_capacity : {8, 100000}) { SooIntTable t; t.insert(0); |