diff options
author | Abseil Team <absl-team@google.com> | 2022-11-28 12:27:06 -0800 |
---|---|---|
committer | Copybara-Service <copybara-worker@google.com> | 2022-11-28 12:27:43 -0800 |
commit | e5a7979d36a84831652abf3138c4d76797c002b5 (patch) | |
tree | e7dc92ce03b1b04428b588f33bddf657ea97f554 /absl/container/internal/hashtablez_sampler.h | |
parent | e3158086978de4ff15a2058f49ba525c89e14ce6 (diff) | |
download | abseil-e5a7979d36a84831652abf3138c4d76797c002b5.tar.gz abseil-e5a7979d36a84831652abf3138c4d76797c002b5.tar.bz2 abseil-e5a7979d36a84831652abf3138c4d76797c002b5.zip |
Reduce flat_hash_{set,map} generated code size.
This CL makes a bunch of changes (mostly to raw_hash_set which
underlies flat_hash_set and flat_hash_map). Techniques used:
* Extract code that does not depend on the specific hash table type
into common (non-inlined) functions.
* Place ABSL_ATTRIBUTE_NOINLINE directives judiciously.
* Out-of-line some slow paths.
Reduces sizes of some large binaries by ~0.5%.
Has no significant performance impact on a few performance critical
binaries.
## Speed of fleetbench micro-benchmarks
Following is a histogram of %-age changes in
[fleetbench](https://github.com/google/fleetbench)
hot_swissmap_benchmark results. Negative numbers indicate a speedup
caused by this change. Statistically insignificant changes are mapped
to zero.
XXX Also run and merge in cold_swissmap_benchmark
Across all 351 benchmarks, the average speedup is 0.38%.
The best speedup was -25%, worst slowdown was +6.81%.
```
Count: 351 Average: -0.382764 StdDev: 3.77807
Min: -25 Median: 0.435135 Max: 6.81
---------------------------------------------
[ -25, -10) 16 4.558% 4.558% #
[ -9, -8) 2 0.570% 5.128%
[ -8, -7) 1 0.285% 5.413%
[ -7, -6) 1 0.285% 5.698%
[ -6, -5) 2 0.570% 6.268%
[ -5, -4) 5 1.425% 7.692%
[ -4, -3) 13 3.704% 11.396% #
[ -3, -2) 15 4.274% 15.670% #
[ -2, -1) 26 7.407% 23.077% ##
[ -1, 0) 14 3.989% 27.066% #
[ 0, 1) 185 52.707% 79.772% ############
[ 1, 2) 14 3.989% 83.761% #
[ 2, 3) 8 2.279% 86.040% #
[ 3, 4) 7 1.994% 88.034%
[ 4, 5) 32 9.117% 97.151% ##
[ 5, 6) 6 1.709% 98.860%
[ 6, 7) 4 1.140% 100.000%
```
We looked at the slowdowns and they do not seem worth worrying
about. E.g., the worst one was:
```
BM_FindHit_Hot<::absl::node_hash_set,64>/set_size:4096/density:0
2.61ns ± 1% 2.79ns ± 1% +6.81% (p=0.008 n=5+5)
```
## Detailed changes
* Out-of-line slow paths in hash table sampler methods.
* Explicitly unregister from sampler instead of from destructor.
* Introduced a non-templated CommonFields struct that holds some of
the hash table fields (infoz, ctrl, slots, size, capacity). This
struct can be passed to new non-templated helpers. The struct is
a private base class of raw_hash_set.
* Made non-inlined InitializeSlots<> that is only templated on
allocator and size/alignment of the slot type so that we can share
instantiations across types that have the same size/alignment.
* Moved some infrequently called code paths into non-inlined type-erased.
functions. Pass a suite of type-specific function pointers to these
routines for when they need to operate on slots.
* Marked some methods as non-inlined.
* Avoid unnecessary reinitialization in destructor.
* Introduce UpdateSpine type-erased helper that is called from
clear() and rehash().
PiperOrigin-RevId: 491413386
Change-Id: Ia5495c5a6ec73622a785a0d260e406ddb9085a7c
Diffstat (limited to 'absl/container/internal/hashtablez_sampler.h')
-rw-r--r-- | absl/container/internal/hashtablez_sampler.h | 54 |
1 files changed, 11 insertions, 43 deletions
diff --git a/absl/container/internal/hashtablez_sampler.h b/absl/container/internal/hashtablez_sampler.h index a89518bb..d8fd8f34 100644 --- a/absl/container/internal/hashtablez_sampler.h +++ b/absl/container/internal/hashtablez_sampler.h @@ -95,55 +95,19 @@ struct HashtablezInfo : public profiling_internal::Sample<HashtablezInfo> { size_t inline_element_size; // How big is the slot? }; -inline void RecordRehashSlow(HashtablezInfo* info, size_t total_probe_length) { -#ifdef ABSL_INTERNAL_HAVE_SSE2 - total_probe_length /= 16; -#else - total_probe_length /= 8; -#endif - info->total_probe_length.store(total_probe_length, std::memory_order_relaxed); - info->num_erases.store(0, std::memory_order_relaxed); - // There is only one concurrent writer, so `load` then `store` is sufficient - // instead of using `fetch_add`. - info->num_rehashes.store( - 1 + info->num_rehashes.load(std::memory_order_relaxed), - std::memory_order_relaxed); -} +void RecordRehashSlow(HashtablezInfo* info, size_t total_probe_length); -inline void RecordReservationSlow(HashtablezInfo* info, - size_t target_capacity) { - info->max_reserve.store( - (std::max)(info->max_reserve.load(std::memory_order_relaxed), - target_capacity), - std::memory_order_relaxed); -} +void RecordReservationSlow(HashtablezInfo* info, size_t target_capacity); -inline void RecordClearedReservationSlow(HashtablezInfo* info) { - info->max_reserve.store(0, std::memory_order_relaxed); -} +void RecordClearedReservationSlow(HashtablezInfo* info); -inline void RecordStorageChangedSlow(HashtablezInfo* info, size_t size, - size_t capacity) { - info->size.store(size, std::memory_order_relaxed); - info->capacity.store(capacity, std::memory_order_relaxed); - if (size == 0) { - // This is a clear, reset the total/num_erases too. - info->total_probe_length.store(0, std::memory_order_relaxed); - info->num_erases.store(0, std::memory_order_relaxed); - } -} +void RecordStorageChangedSlow(HashtablezInfo* info, size_t size, + size_t capacity); void RecordInsertSlow(HashtablezInfo* info, size_t hash, size_t distance_from_desired); -inline void RecordEraseSlow(HashtablezInfo* info) { - info->size.fetch_sub(1, std::memory_order_relaxed); - // There is only one concurrent writer, so `load` then `store` is sufficient - // instead of using `fetch_add`. - info->num_erases.store( - 1 + info->num_erases.load(std::memory_order_relaxed), - std::memory_order_relaxed); -} +void RecordEraseSlow(HashtablezInfo* info); struct SamplingState { int64_t next_sample; @@ -165,7 +129,10 @@ class HashtablezInfoHandle { public: explicit HashtablezInfoHandle() : info_(nullptr) {} explicit HashtablezInfoHandle(HashtablezInfo* info) : info_(info) {} - ~HashtablezInfoHandle() { + + // We do not have a destructor. Caller is responsible for calling Unregister + // before destroying the handle. + void Unregister() { if (ABSL_PREDICT_TRUE(info_ == nullptr)) return; UnsampleSlow(info_); } @@ -230,6 +197,7 @@ class HashtablezInfoHandle { explicit HashtablezInfoHandle() = default; explicit HashtablezInfoHandle(std::nullptr_t) {} + inline void Unregister() {} inline void RecordStorageChanged(size_t /*size*/, size_t /*capacity*/) {} inline void RecordRehash(size_t /*total_probe_length*/) {} inline void RecordReservation(size_t /*target_capacity*/) {} |