aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAbseil Team <absl-team@google.com>2023-10-12 11:29:12 -0700
committerCopybara-Service <copybara-worker@google.com>2023-10-12 11:31:38 -0700
commit9fb8a3886d3539d939c2ba5c22f113b1e6a26ae9 (patch)
treec0c2e4190fed28a87997f20c0df260f158ea89c6
parent5a7fca7de1a0285a0887030a346d886b8978a03c (diff)
downloadabseil-9fb8a3886d3539d939c2ba5c22f113b1e6a26ae9.tar.gz
abseil-9fb8a3886d3539d939c2ba5c22f113b1e6a26ae9.tar.bz2
abseil-9fb8a3886d3539d939c2ba5c22f113b1e6a26ae9.zip
The current implementation of control by checking on x86 has an unnecessary sign extension after the doing the control byte comparison. Changing the bitmask object
to explicitly track only 16 bits (instead of 32) eliminates this, saving an instruction / cycle. This speeds up hit checking by up to 6% on Milan and up to 15% on CLX PiperOrigin-RevId: 572965182 Change-Id: Ifda0e3250d409266d6dcef89cba6ada91d879291
-rw-r--r--absl/container/internal/raw_hash_set.h26
1 files changed, 15 insertions, 11 deletions
diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h
index ad6e290a..9eee4fc7 100644
--- a/absl/container/internal/raw_hash_set.h
+++ b/absl/container/internal/raw_hash_set.h
@@ -408,7 +408,9 @@ class NonIterableBitMask {
uint32_t LeadingZeros() const {
constexpr int total_significant_bits = SignificantBits << Shift;
constexpr int extra_bits = sizeof(T) * 8 - total_significant_bits;
- return static_cast<uint32_t>(countl_zero(mask_ << extra_bits)) >> Shift;
+ return static_cast<uint32_t>(
+ countl_zero(static_cast<T>(mask_ << extra_bits))) >>
+ Shift;
}
T mask_;
@@ -614,29 +616,31 @@ struct GroupSse2Impl {
}
// Returns a bitmask representing the positions of slots that match hash.
- BitMask<uint32_t, kWidth> Match(h2_t hash) const {
+ BitMask<uint16_t, kWidth> Match(h2_t hash) const {
auto match = _mm_set1_epi8(static_cast<char>(hash));
- return BitMask<uint32_t, kWidth>(
- static_cast<uint32_t>(_mm_movemask_epi8(_mm_cmpeq_epi8(match, ctrl))));
+ BitMask<uint16_t, kWidth> result = BitMask<uint16_t, kWidth>(0);
+ result = BitMask<uint16_t, kWidth>(
+ static_cast<uint16_t>(_mm_movemask_epi8(_mm_cmpeq_epi8(match, ctrl))));
+ return result;
}
// Returns a bitmask representing the positions of empty slots.
- NonIterableBitMask<uint32_t, kWidth> MaskEmpty() const {
+ NonIterableBitMask<uint16_t, kWidth> MaskEmpty() const {
#ifdef ABSL_INTERNAL_HAVE_SSSE3
// This only works because ctrl_t::kEmpty is -128.
- return NonIterableBitMask<uint32_t, kWidth>(
- static_cast<uint32_t>(_mm_movemask_epi8(_mm_sign_epi8(ctrl, ctrl))));
+ return NonIterableBitMask<uint16_t, kWidth>(
+ static_cast<uint16_t>(_mm_movemask_epi8(_mm_sign_epi8(ctrl, ctrl))));
#else
auto match = _mm_set1_epi8(static_cast<char>(ctrl_t::kEmpty));
- return NonIterableBitMask<uint32_t, kWidth>(
- static_cast<uint32_t>(_mm_movemask_epi8(_mm_cmpeq_epi8(match, ctrl))));
+ return NonIterableBitMask<uint16_t, kWidth>(
+ static_cast<uint16_t>(_mm_movemask_epi8(_mm_cmpeq_epi8(match, ctrl))));
#endif
}
// Returns a bitmask representing the positions of empty or deleted slots.
- NonIterableBitMask<uint32_t, kWidth> MaskEmptyOrDeleted() const {
+ NonIterableBitMask<uint16_t, kWidth> MaskEmptyOrDeleted() const {
auto special = _mm_set1_epi8(static_cast<char>(ctrl_t::kSentinel));
- return NonIterableBitMask<uint32_t, kWidth>(static_cast<uint32_t>(
+ return NonIterableBitMask<uint16_t, kWidth>(static_cast<uint16_t>(
_mm_movemask_epi8(_mm_cmpgt_epi8_fixed(special, ctrl))));
}