diff options
author | Abseil Team <absl-team@google.com> | 2022-11-28 12:27:06 -0800 |
---|---|---|
committer | Copybara-Service <copybara-worker@google.com> | 2022-11-28 12:27:43 -0800 |
commit | e5a7979d36a84831652abf3138c4d76797c002b5 (patch) | |
tree | e7dc92ce03b1b04428b588f33bddf657ea97f554 /absl/container/internal/raw_hash_set.cc | |
parent | e3158086978de4ff15a2058f49ba525c89e14ce6 (diff) | |
download | abseil-e5a7979d36a84831652abf3138c4d76797c002b5.tar.gz abseil-e5a7979d36a84831652abf3138c4d76797c002b5.tar.bz2 abseil-e5a7979d36a84831652abf3138c4d76797c002b5.zip |
Reduce flat_hash_{set,map} generated code size.
This CL makes a bunch of changes (mostly to raw_hash_set which
underlies flat_hash_set and flat_hash_map). Techniques used:
* Extract code that does not depend on the specific hash table type
into common (non-inlined) functions.
* Place ABSL_ATTRIBUTE_NOINLINE directives judiciously.
* Out-of-line some slow paths.
Reduces sizes of some large binaries by ~0.5%.
Has no significant performance impact on a few performance critical
binaries.
## Speed of fleetbench micro-benchmarks
Following is a histogram of %-age changes in
[fleetbench](https://github.com/google/fleetbench)
hot_swissmap_benchmark results. Negative numbers indicate a speedup
caused by this change. Statistically insignificant changes are mapped
to zero.
XXX Also run and merge in cold_swissmap_benchmark
Across all 351 benchmarks, the average speedup is 0.38%.
The best speedup was -25%, worst slowdown was +6.81%.
```
Count: 351 Average: -0.382764 StdDev: 3.77807
Min: -25 Median: 0.435135 Max: 6.81
---------------------------------------------
[ -25, -10) 16 4.558% 4.558% #
[ -9, -8) 2 0.570% 5.128%
[ -8, -7) 1 0.285% 5.413%
[ -7, -6) 1 0.285% 5.698%
[ -6, -5) 2 0.570% 6.268%
[ -5, -4) 5 1.425% 7.692%
[ -4, -3) 13 3.704% 11.396% #
[ -3, -2) 15 4.274% 15.670% #
[ -2, -1) 26 7.407% 23.077% ##
[ -1, 0) 14 3.989% 27.066% #
[ 0, 1) 185 52.707% 79.772% ############
[ 1, 2) 14 3.989% 83.761% #
[ 2, 3) 8 2.279% 86.040% #
[ 3, 4) 7 1.994% 88.034%
[ 4, 5) 32 9.117% 97.151% ##
[ 5, 6) 6 1.709% 98.860%
[ 6, 7) 4 1.140% 100.000%
```
We looked at the slowdowns and they do not seem worth worrying
about. E.g., the worst one was:
```
BM_FindHit_Hot<::absl::node_hash_set,64>/set_size:4096/density:0
2.61ns ± 1% 2.79ns ± 1% +6.81% (p=0.008 n=5+5)
```
## Detailed changes
* Out-of-line slow paths in hash table sampler methods.
* Explicitly unregister from sampler instead of from destructor.
* Introduced a non-templated CommonFields struct that holds some of
the hash table fields (infoz, ctrl, slots, size, capacity). This
struct can be passed to new non-templated helpers. The struct is
a private base class of raw_hash_set.
* Made non-inlined InitializeSlots<> that is only templated on
allocator and size/alignment of the slot type so that we can share
instantiations across types that have the same size/alignment.
* Moved some infrequently called code paths into non-inlined type-erased.
functions. Pass a suite of type-specific function pointers to these
routines for when they need to operate on slots.
* Marked some methods as non-inlined.
* Avoid unnecessary reinitialization in destructor.
* Introduce UpdateSpine type-erased helper that is called from
clear() and rehash().
PiperOrigin-RevId: 491413386
Change-Id: Ia5495c5a6ec73622a785a0d260e406ddb9085a7c
Diffstat (limited to 'absl/container/internal/raw_hash_set.cc')
-rw-r--r-- | absl/container/internal/raw_hash_set.cc | 152 |
1 files changed, 150 insertions, 2 deletions
diff --git a/absl/container/internal/raw_hash_set.cc b/absl/container/internal/raw_hash_set.cc index c63a2e02..fae12793 100644 --- a/absl/container/internal/raw_hash_set.cc +++ b/absl/container/internal/raw_hash_set.cc @@ -63,8 +63,156 @@ void ConvertDeletedToEmptyAndFullToDeleted(ctrl_t* ctrl, size_t capacity) { std::memcpy(ctrl + capacity + 1, ctrl, NumClonedBytes()); ctrl[capacity] = ctrl_t::kSentinel; } -// Extern template instantiotion for inline function. -template FindInfo find_first_non_full(const ctrl_t*, size_t, size_t); +// Extern template instantiation for inline function. +template FindInfo find_first_non_full(const CommonFields&, size_t); + +FindInfo find_first_non_full_outofline(const CommonFields& common, + size_t hash) { + return find_first_non_full(common, hash); +} + +// Return address of the ith slot in slots where each slot occupies slot_size. +static inline void* SlotAddress(void* slot_array, size_t slot, + size_t slot_size) { + return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(slot_array) + + (slot * slot_size)); +} + +// Return the address of the slot just after slot assuming each slot +// has the specified size. +static inline void* NextSlot(void* slot, size_t slot_size) { + return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(slot) + slot_size); +} + +// Return the address of the slot just before slot assuming each slot +// has the specified size. +static inline void* PrevSlot(void* slot, size_t slot_size) { + return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(slot) - slot_size); +} + +void DropDeletesWithoutResize(CommonFields& common, size_t& growth_left, + const PolicyFunctions& policy, void* tmp_space) { + void* set = &common; + void* slot_array = common.slots_; + const size_t capacity = common.capacity_; + assert(IsValidCapacity(capacity)); + assert(!is_small(capacity)); + // Algorithm: + // - mark all DELETED slots as EMPTY + // - mark all FULL slots as DELETED + // - for each slot marked as DELETED + // hash = Hash(element) + // target = find_first_non_full(hash) + // if target is in the same group + // mark slot as FULL + // else if target is EMPTY + // transfer element to target + // mark slot as EMPTY + // mark target as FULL + // else if target is DELETED + // swap current element with target element + // mark target as FULL + // repeat procedure for current slot with moved from element (target) + ctrl_t* ctrl = common.control_; + ConvertDeletedToEmptyAndFullToDeleted(ctrl, capacity); + auto hasher = policy.hash_slot; + auto transfer = policy.transfer; + const size_t slot_size = policy.slot_size; + + size_t total_probe_length = 0; + void* slot_ptr = SlotAddress(slot_array, 0, slot_size); + for (size_t i = 0; i != capacity; + ++i, slot_ptr = NextSlot(slot_ptr, slot_size)) { + assert(slot_ptr == SlotAddress(slot_array, i, slot_size)); + if (!IsDeleted(ctrl[i])) continue; + const size_t hash = (*hasher)(set, slot_ptr); + const FindInfo target = find_first_non_full(common, hash); + const size_t new_i = target.offset; + total_probe_length += target.probe_length; + + // Verify if the old and new i fall within the same group wrt the hash. + // If they do, we don't need to move the object as it falls already in the + // best probe we can. + const size_t probe_offset = probe(common, hash).offset(); + const auto probe_index = [probe_offset, capacity](size_t pos) { + return ((pos - probe_offset) & capacity) / Group::kWidth; + }; + + // Element doesn't move. + if (ABSL_PREDICT_TRUE(probe_index(new_i) == probe_index(i))) { + SetCtrl(common, i, H2(hash), slot_size); + continue; + } + + void* new_slot_ptr = SlotAddress(slot_array, new_i, slot_size); + if (IsEmpty(ctrl[new_i])) { + // Transfer element to the empty spot. + // SetCtrl poisons/unpoisons the slots so we have to call it at the + // right time. + SetCtrl(common, new_i, H2(hash), slot_size); + (*transfer)(set, new_slot_ptr, slot_ptr); + SetCtrl(common, i, ctrl_t::kEmpty, slot_size); + } else { + assert(IsDeleted(ctrl[new_i])); + SetCtrl(common, new_i, H2(hash), slot_size); + // Until we are done rehashing, DELETED marks previously FULL slots. + + // Swap i and new_i elements. + (*transfer)(set, tmp_space, new_slot_ptr); + (*transfer)(set, new_slot_ptr, slot_ptr); + (*transfer)(set, slot_ptr, tmp_space); + + // repeat the processing of the ith slot + --i; + slot_ptr = PrevSlot(slot_ptr, slot_size); + } + } + ResetGrowthLeft(common, growth_left); + common.infoz().RecordRehash(total_probe_length); +} + +void EraseMetaOnly(CommonFields& c, size_t& growth_left, ctrl_t* it, + size_t slot_size) { + assert(IsFull(*it) && "erasing a dangling iterator"); + --c.size_; + const auto index = static_cast<size_t>(it - c.control_); + const size_t index_before = (index - Group::kWidth) & c.capacity_; + const auto empty_after = Group(it).MaskEmpty(); + const auto empty_before = Group(c.control_ + index_before).MaskEmpty(); + + // We count how many consecutive non empties we have to the right and to the + // left of `it`. If the sum is >= kWidth then there is at least one probe + // window that might have seen a full group. + bool was_never_full = + empty_before && empty_after && + static_cast<size_t>(empty_after.TrailingZeros() + + empty_before.LeadingZeros()) < Group::kWidth; + + SetCtrl(c, index, was_never_full ? ctrl_t::kEmpty : ctrl_t::kDeleted, + slot_size); + growth_left += (was_never_full ? 1 : 0); + c.infoz().RecordErase(); +} + +void ClearBackingArray(CommonFields& c, size_t& growth_left, + const PolicyFunctions& policy, bool reuse) { + if (reuse) { + c.size_ = 0; + ResetCtrl(c, growth_left, policy.slot_size); + c.infoz().RecordStorageChanged(0, c.capacity_); + } else { + void* set = &c; + (*policy.dealloc)(set, policy, c.control_, c.slots_, c.capacity_); + c.control_ = EmptyGroup(); + c.slots_ = nullptr; + c.size_ = 0; + c.capacity_ = 0; + growth_left = 0; + c.infoz().RecordClearedReservation(); + assert(c.size_ == 0); + c.infoz().RecordStorageChanged(0, 0); + } +} } // namespace container_internal ABSL_NAMESPACE_END |