From 1980d7b98a0cae64432ccf9c4afebda5e21b2c5e Mon Sep 17 00:00:00 2001 From: Evan Brown Date: Tue, 19 Mar 2024 12:07:34 -0700 Subject: Do hashtablez sampling on the first insertion into an empty SOO hashtable. When sampling triggers, we skip SOO and allocate a backing array. We must do this because the HashtablezInfoHandle is part of the heap allocation (along with the control bytes and slots). By default, we sample 1 in ~1024 hashtables when sampling is enabled. This will impact performance because (a) we won't benefit from SOO so we would have worse data locality (more cache/TLB misses), and (b) the backing array capacity will be 3 instead of 1 so (b.1) we skip the rehash after the second insertion and (b.2) we potentially waste up to two slots worth of memory. We also add an soo_capacity field to HashtablezInfo to allow for distinguishing which sampled tables may otherwise have been SOO - this will allow us to know approximately what fraction of tables are in SOO mode. PiperOrigin-RevId: 617252334 Change-Id: Ib48b7a4870bd74ea3ba923ed8f350a3b75dbb7d3 --- absl/container/internal/raw_hash_set_test.cc | 184 ++++++++++++++++++++++++--- 1 file changed, 163 insertions(+), 21 deletions(-) (limited to 'absl/container/internal/raw_hash_set_test.cc') diff --git a/absl/container/internal/raw_hash_set_test.cc b/absl/container/internal/raw_hash_set_test.cc index 66baeb64..f00cef8c 100644 --- a/absl/container/internal/raw_hash_set_test.cc +++ b/absl/container/internal/raw_hash_set_test.cc @@ -52,7 +52,9 @@ #include "absl/container/internal/hashtable_debug.h" #include "absl/container/internal/hashtablez_sampler.h" #include "absl/container/internal/test_allocator.h" +#include "absl/functional/function_ref.h" #include "absl/hash/hash.h" +#include "absl/log/check.h" #include "absl/log/log.h" #include "absl/memory/memory.h" #include "absl/meta/type_traits.h" @@ -2035,6 +2037,9 @@ TYPED_TEST_P(SooTest, FindFullDeletedRegression) { } TYPED_TEST_P(SooTest, ReplacingDeletedSlotDoesNotRehash) { + // We need to disable hashtablez to avoid issues related to SOO and sampling. + SetHashtablezEnabled(false); + size_t n; { // Compute n such that n is the maximum number of elements before rehash. @@ -2388,6 +2393,9 @@ TYPED_TEST_P(SooTest, IterationOrderChangesOnRehash) { // Verify that pointers are invalidated as soon as a second element is inserted. // This prevents dependency on pointer stability on small tables. TYPED_TEST_P(SooTest, UnstablePointers) { + // We need to disable hashtablez to avoid issues related to SOO and sampling. + SetHashtablezEnabled(false); + TypeParam table; const auto addr = [&](int i) { @@ -2523,7 +2531,7 @@ TYPED_TEST_P(RawHashSamplerTest, Sample) { constexpr bool soo_enabled = std::is_same::value; // Enable the feature even if the prod default is off. SetHashtablezEnabled(true); - SetHashtablezSampleParameter(100); + SetHashtablezSampleParameter(100); // Sample ~1% of tables. auto& sampler = GlobalHashtablezSampler(); size_t start_size = 0; @@ -2557,25 +2565,26 @@ TYPED_TEST_P(RawHashSamplerTest, Sample) { absl::flat_hash_map observed_checksums; absl::flat_hash_map reservations; end_size += sampler.Iterate([&](const HashtablezInfo& info) { - if (preexisting_info.count(&info) == 0) { - observed_checksums[info.hashes_bitwise_xor.load( - std::memory_order_relaxed)]++; - reservations[info.max_reserve.load(std::memory_order_relaxed)]++; - } - EXPECT_EQ(info.inline_element_size, sizeof(typename TypeParam::value_type)); ++end_size; + if (preexisting_info.contains(&info)) return; + observed_checksums[info.hashes_bitwise_xor.load( + std::memory_order_relaxed)]++; + reservations[info.max_reserve.load(std::memory_order_relaxed)]++; + EXPECT_EQ(info.inline_element_size, sizeof(typename TypeParam::value_type)); + + if (soo_enabled) { + EXPECT_EQ(info.soo_capacity, SooCapacity()); + } else { + EXPECT_EQ(info.soo_capacity, 0); + } }); + // Expect that we sampled at the requested sampling rate of ~1%. EXPECT_NEAR((end_size - start_size) / static_cast(tables.size()), 0.01, 0.005); - if (soo_enabled) { - EXPECT_EQ(observed_checksums.size(), 9); - } else { - EXPECT_EQ(observed_checksums.size(), 5); - for (const auto& [_, count] : observed_checksums) { - EXPECT_NEAR((100 * count) / static_cast(tables.size()), 0.2, - 0.05); - } + EXPECT_EQ(observed_checksums.size(), 5); + for (const auto& [_, count] : observed_checksums) { + EXPECT_NEAR((100 * count) / static_cast(tables.size()), 0.2, 0.05); } EXPECT_EQ(reservations.size(), 10); @@ -2591,12 +2600,141 @@ TYPED_TEST_P(RawHashSamplerTest, Sample) { REGISTER_TYPED_TEST_SUITE_P(RawHashSamplerTest, Sample); using RawHashSamplerTestTypes = ::testing::Types; INSTANTIATE_TYPED_TEST_SUITE_P(My, RawHashSamplerTest, RawHashSamplerTestTypes); + +std::vector SampleSooMutation( + absl::FunctionRef mutate_table) { + // Enable the feature even if the prod default is off. + SetHashtablezEnabled(true); + SetHashtablezSampleParameter(100); // Sample ~1% of tables. + + auto& sampler = GlobalHashtablezSampler(); + size_t start_size = 0; + absl::flat_hash_set preexisting_info; + start_size += sampler.Iterate([&](const HashtablezInfo& info) { + preexisting_info.insert(&info); + ++start_size; + }); + + std::vector tables; + for (int i = 0; i < 1000000; ++i) { + tables.emplace_back(); + mutate_table(tables.back()); + } + size_t end_size = 0; + std::vector infos; + end_size += sampler.Iterate([&](const HashtablezInfo& info) { + ++end_size; + if (preexisting_info.contains(&info)) return; + infos.push_back(&info); + }); + + // Expect that we sampled at the requested sampling rate of ~1%. + EXPECT_NEAR((end_size - start_size) / static_cast(tables.size()), + 0.01, 0.005); + return infos; +} + +TEST(RawHashSamplerTest, SooTableInsertToEmpty) { + if (SooIntTable().capacity() != SooCapacity()) { + CHECK_LT(sizeof(void*), 8) << "missing SOO coverage"; + GTEST_SKIP() << "not SOO on this platform"; + } + std::vector infos = + SampleSooMutation([](SooIntTable& t) { t.insert(1); }); + + for (const HashtablezInfo* info : infos) { + ASSERT_EQ(info->inline_element_size, + sizeof(typename SooIntTable::value_type)); + ASSERT_EQ(info->soo_capacity, SooCapacity()); + ASSERT_EQ(info->capacity, NextCapacity(SooCapacity())); + ASSERT_EQ(info->size, 1); + ASSERT_EQ(info->max_reserve, 0); + ASSERT_EQ(info->num_erases, 0); + ASSERT_EQ(info->max_probe_length, 0); + ASSERT_EQ(info->total_probe_length, 0); + } +} + +TEST(RawHashSamplerTest, SooTableReserveToEmpty) { + if (SooIntTable().capacity() != SooCapacity()) { + CHECK_LT(sizeof(void*), 8) << "missing SOO coverage"; + GTEST_SKIP() << "not SOO on this platform"; + } + std::vector infos = + SampleSooMutation([](SooIntTable& t) { t.reserve(100); }); + + for (const HashtablezInfo* info : infos) { + ASSERT_EQ(info->inline_element_size, + sizeof(typename SooIntTable::value_type)); + ASSERT_EQ(info->soo_capacity, SooCapacity()); + ASSERT_GE(info->capacity, 100); + ASSERT_EQ(info->size, 0); + ASSERT_EQ(info->max_reserve, 100); + ASSERT_EQ(info->num_erases, 0); + ASSERT_EQ(info->max_probe_length, 0); + ASSERT_EQ(info->total_probe_length, 0); + } +} + +// This tests that reserve on a full SOO table doesn't incorrectly result in new +// (over-)sampling. +TEST(RawHashSamplerTest, SooTableReserveToFullSoo) { + if (SooIntTable().capacity() != SooCapacity()) { + CHECK_LT(sizeof(void*), 8) << "missing SOO coverage"; + GTEST_SKIP() << "not SOO on this platform"; + } + std::vector infos = + SampleSooMutation([](SooIntTable& t) { + t.insert(1); + t.reserve(100); + }); + + for (const HashtablezInfo* info : infos) { + ASSERT_EQ(info->inline_element_size, + sizeof(typename SooIntTable::value_type)); + ASSERT_EQ(info->soo_capacity, SooCapacity()); + ASSERT_GE(info->capacity, 100); + ASSERT_EQ(info->size, 1); + ASSERT_EQ(info->max_reserve, 100); + ASSERT_EQ(info->num_erases, 0); + ASSERT_EQ(info->max_probe_length, 0); + ASSERT_EQ(info->total_probe_length, 0); + } +} + +// This tests that rehash(0) on a sampled table with size that fits in SOO +// doesn't incorrectly result in losing sampling. +TEST(RawHashSamplerTest, SooTableRehashShrinkWhenSizeFitsInSoo) { + if (SooIntTable().capacity() != SooCapacity()) { + CHECK_LT(sizeof(void*), 8) << "missing SOO coverage"; + GTEST_SKIP() << "not SOO on this platform"; + } + std::vector infos = + SampleSooMutation([](SooIntTable& t) { + t.reserve(100); + t.insert(1); + EXPECT_GE(t.capacity(), 100); + t.rehash(0); + }); + + for (const HashtablezInfo* info : infos) { + ASSERT_EQ(info->inline_element_size, + sizeof(typename SooIntTable::value_type)); + ASSERT_EQ(info->soo_capacity, SooCapacity()); + ASSERT_EQ(info->capacity, NextCapacity(SooCapacity())); + ASSERT_EQ(info->size, 1); + ASSERT_EQ(info->max_reserve, 100); + ASSERT_EQ(info->num_erases, 0); + ASSERT_EQ(info->max_probe_length, 0); + ASSERT_EQ(info->total_probe_length, 0); + } +} #endif // ABSL_INTERNAL_HASHTABLEZ_SAMPLE TEST(RawHashSamplerTest, DoNotSampleCustomAllocators) { // Enable the feature even if the prod default is off. SetHashtablezEnabled(true); - SetHashtablezSampleParameter(100); + SetHashtablezSampleParameter(100); // Sample ~1% of tables. auto& sampler = GlobalHashtablezSampler(); size_t start_size = 0; @@ -2974,7 +3112,7 @@ TYPED_TEST_P(SooTable, Basic) { bool frozen = true; TypeParam t{FreezableAlloc(&frozen)}; if (t.capacity() != SooCapacity()) { - EXPECT_LT(sizeof(void*), 8); + CHECK_LT(sizeof(void*), 8) << "missing SOO coverage"; GTEST_SKIP() << "not SOO on this platform"; } @@ -3006,14 +3144,18 @@ using FreezableSooTableTypes = FreezableSizedValueSooTable<16>>; INSTANTIATE_TYPED_TEST_SUITE_P(My, SooTable, FreezableSooTableTypes); -TEST(Table, RehashToSoo) { +TEST(Table, RehashToSooUnsampled) { SooIntTable t; if (t.capacity() != SooCapacity()) { - EXPECT_LT(sizeof(void*), 8); + CHECK_LT(sizeof(void*), 8) << "missing SOO coverage"; GTEST_SKIP() << "not SOO on this platform"; } - t.reserve(10); + // We disable hashtablez sampling for this test to ensure that the table isn't + // sampled. When the table is sampled, it won't rehash down to SOO. + SetHashtablezEnabled(false); + + t.reserve(100); t.insert(0); EXPECT_EQ(*t.begin(), 0); @@ -3026,7 +3168,7 @@ TEST(Table, RehashToSoo) { EXPECT_EQ(t.find(1), t.end()); } -TEST(Table, ResizeToNonSoo) { +TEST(Table, ReserveToNonSoo) { for (int reserve_capacity : {8, 100000}) { SooIntTable t; t.insert(0); -- cgit v1.2.3