diff options
Diffstat (limited to 'absl/hash')
-rw-r--r-- | absl/hash/BUILD.bazel | 2 | ||||
-rw-r--r-- | absl/hash/CMakeLists.txt | 10 | ||||
-rw-r--r-- | absl/hash/hash_test.cc | 20 | ||||
-rw-r--r-- | absl/hash/internal/city.cc | 20 | ||||
-rw-r--r-- | absl/hash/internal/hash.h | 22 | ||||
-rw-r--r-- | absl/hash/internal/low_level_hash.cc | 17 | ||||
-rw-r--r-- | absl/hash/internal/low_level_hash_test.cc | 48 |
7 files changed, 51 insertions, 88 deletions
diff --git a/absl/hash/BUILD.bazel b/absl/hash/BUILD.bazel index bcc316f9..a0db919b 100644 --- a/absl/hash/BUILD.bazel +++ b/absl/hash/BUILD.bazel @@ -43,6 +43,7 @@ cc_library( "//absl/container:fixed_array", "//absl/functional:function_ref", "//absl/meta:type_traits", + "//absl/numeric:bits", "//absl/numeric:int128", "//absl/strings", "//absl/types:optional", @@ -157,7 +158,6 @@ cc_library( deps = [ "//absl/base:config", "//absl/base:endian", - "//absl/numeric:bits", "//absl/numeric:int128", ], ) diff --git a/absl/hash/CMakeLists.txt b/absl/hash/CMakeLists.txt index 423b74b5..f99f35bc 100644 --- a/absl/hash/CMakeLists.txt +++ b/absl/hash/CMakeLists.txt @@ -24,7 +24,8 @@ absl_cc_library( "internal/hash.h" COPTS ${ABSL_DEFAULT_COPTS} - DEPS + DEPS + absl::bits absl::city absl::config absl::core_headers @@ -55,6 +56,7 @@ absl_cc_library( absl::variant GTest::gmock TESTONLY + PUBLIC ) absl_cc_test( @@ -81,6 +83,10 @@ absl_cc_test( ) # Internal-only target, do not depend on directly. +# +# Note: Even though external code should not depend on this target +# directly, it must be marked PUBLIC since it is a dependency of +# hash_testing. absl_cc_library( NAME spy_hash_state @@ -93,6 +99,7 @@ absl_cc_library( absl::strings absl::str_format TESTONLY + PUBLIC ) # Internal-only target, do not depend on directly. @@ -134,7 +141,6 @@ absl_cc_library( COPTS ${ABSL_DEFAULT_COPTS} DEPS - absl::bits absl::config absl::endian absl::int128 diff --git a/absl/hash/hash_test.cc b/absl/hash/hash_test.cc index ffa45e6e..5b556180 100644 --- a/absl/hash/hash_test.cc +++ b/absl/hash/hash_test.cc @@ -222,7 +222,7 @@ TEST(HashValueTest, PointerAlignment) { // Limit the scope to the bits we would be using for Swisstable. constexpr size_t kMask = (1 << (kLog2NumValues + 7)) - 1; size_t stuck_bits = (~bits_or | bits_and) & kMask; - EXPECT_EQ(stuck_bits, 0) << "0x" << std::hex << stuck_bits; + EXPECT_EQ(stuck_bits, 0u) << "0x" << std::hex << stuck_bits; } } @@ -737,10 +737,10 @@ TEST(HashValueTest, CombinePiecewiseBuffer) { // // This test is run on a buffer that is a multiple of the stride size, and one // that isn't. - for (size_t big_buffer_size : {1024 * 2 + 512, 1024 * 3}) { + for (size_t big_buffer_size : {1024u * 2 + 512u, 1024u * 3}) { SCOPED_TRACE(big_buffer_size); std::string big_buffer; - for (int i = 0; i < big_buffer_size; ++i) { + for (size_t i = 0; i < big_buffer_size; ++i) { // Arbitrary string big_buffer.push_back(32 + (i * (i / 3)) % 64); } @@ -904,8 +904,8 @@ TEST(IsHashableTest, PoisonHash) { EXPECT_FALSE(absl::is_copy_assignable<absl::Hash<X>>::value); EXPECT_FALSE(absl::is_move_assignable<absl::Hash<X>>::value); EXPECT_FALSE(IsHashCallable<X>::value); -#if !defined(__GNUC__) || __GNUC__ < 9 - // This doesn't compile on GCC 9. +#if !defined(__GNUC__) || defined(__clang__) + // TODO(b/144368551): As of GCC 8.4 this does not compile. EXPECT_FALSE(IsAggregateInitializable<absl::Hash<X>>::value); #endif } @@ -1135,10 +1135,10 @@ TEST(HashTest, HashNonUniquelyRepresentedType) { unsigned char buffer2[kNumStructs * sizeof(StructWithPadding)]; std::memset(buffer2, 255, sizeof(buffer2)); auto* s2 = reinterpret_cast<StructWithPadding*>(buffer2); - for (int i = 0; i < kNumStructs; ++i) { + for (size_t i = 0; i < kNumStructs; ++i) { SCOPED_TRACE(i); - s1[i].c = s2[i].c = '0' + i; - s1[i].i = s2[i].i = i; + s1[i].c = s2[i].c = static_cast<char>('0' + i); + s1[i].i = s2[i].i = static_cast<int>(i); ASSERT_FALSE(memcmp(buffer1 + i * sizeof(StructWithPadding), buffer2 + i * sizeof(StructWithPadding), sizeof(StructWithPadding)) == 0) @@ -1226,7 +1226,9 @@ struct ValueWithBoolConversion { namespace std { template <> struct hash<ValueWithBoolConversion> { - size_t operator()(ValueWithBoolConversion v) { return v.i; } + size_t operator()(ValueWithBoolConversion v) { + return static_cast<size_t>(v.i); + } }; } // namespace std diff --git a/absl/hash/internal/city.cc b/absl/hash/internal/city.cc index 5460134e..f0d31964 100644 --- a/absl/hash/internal/city.cc +++ b/absl/hash/internal/city.cc @@ -97,7 +97,7 @@ static uint32_t Hash32Len13to24(const char *s, size_t len) { uint32_t d = Fetch32(s + (len >> 1)); uint32_t e = Fetch32(s); uint32_t f = Fetch32(s + len - 4); - uint32_t h = len; + uint32_t h = static_cast<uint32_t>(len); return fmix(Mur(f, Mur(e, Mur(d, Mur(c, Mur(b, Mur(a, h))))))); } @@ -106,15 +106,15 @@ static uint32_t Hash32Len0to4(const char *s, size_t len) { uint32_t b = 0; uint32_t c = 9; for (size_t i = 0; i < len; i++) { - signed char v = s[i]; - b = b * c1 + v; + signed char v = static_cast<signed char>(s[i]); + b = b * c1 + static_cast<uint32_t>(v); c ^= b; } - return fmix(Mur(b, Mur(len, c))); + return fmix(Mur(b, Mur(static_cast<uint32_t>(len), c))); } static uint32_t Hash32Len5to12(const char *s, size_t len) { - uint32_t a = len, b = len * 5, c = 9, d = b; + uint32_t a = static_cast<uint32_t>(len), b = a * 5, c = 9, d = b; a += Fetch32(s); b += Fetch32(s + len - 4); c += Fetch32(s + ((len >> 1) & 4)); @@ -129,7 +129,7 @@ uint32_t CityHash32(const char *s, size_t len) { } // len > 24 - uint32_t h = len, g = c1 * len, f = g; + uint32_t h = static_cast<uint32_t>(len), g = c1 * h, f = g; uint32_t a0 = Rotate32(Fetch32(s + len - 4) * c1, 17) * c2; uint32_t a1 = Rotate32(Fetch32(s + len - 8) * c1, 17) * c2; @@ -230,11 +230,11 @@ static uint64_t HashLen0to16(const char *s, size_t len) { return HashLen16(len + (a << 3), Fetch32(s + len - 4), mul); } if (len > 0) { - uint8_t a = s[0]; - uint8_t b = s[len >> 1]; - uint8_t c = s[len - 1]; + uint8_t a = static_cast<uint8_t>(s[0]); + uint8_t b = static_cast<uint8_t>(s[len >> 1]); + uint8_t c = static_cast<uint8_t>(s[len - 1]); uint32_t y = static_cast<uint32_t>(a) + (static_cast<uint32_t>(b) << 8); - uint32_t z = len + (static_cast<uint32_t>(c) << 2); + uint32_t z = static_cast<uint32_t>(len) + (static_cast<uint32_t>(c) << 2); return ShiftMix(y * k2 ^ z * k0) * k2; } return k2; diff --git a/absl/hash/internal/hash.h b/absl/hash/internal/hash.h index 45dfdd46..ccf4cc1a 100644 --- a/absl/hash/internal/hash.h +++ b/absl/hash/internal/hash.h @@ -49,6 +49,7 @@ #include "absl/hash/internal/city.h" #include "absl/hash/internal/low_level_hash.h" #include "absl/meta/type_traits.h" +#include "absl/numeric/bits.h" #include "absl/numeric/int128.h" #include "absl/strings/string_view.h" #include "absl/types/optional.h" @@ -444,7 +445,7 @@ H AbslHashValue(H hash_state, T C::* ptr) { // On other platforms, we assume that pointers-to-members do not have // padding. #ifdef __cpp_lib_has_unique_object_representations - static_assert(std::has_unique_object_representations_v<T C::*>); + static_assert(std::has_unique_object_representations<T C::*>::value); #endif // __cpp_lib_has_unique_object_representations return n; #endif @@ -1052,7 +1053,7 @@ class ABSL_DLL MixingHashState : public HashStateBase<MixingHashState> { uint64_t most_significant = low_mem; uint64_t least_significant = high_mem; #endif - return {least_significant, most_significant >> (128 - len * 8)}; + return {least_significant, most_significant}; } // Reads 4 to 8 bytes from p. Zero pads to fill uint64_t. @@ -1183,9 +1184,22 @@ inline uint64_t MixingHashState::CombineContiguousImpl( } v = Hash64(first, len); } else if (len > 8) { + // This hash function was constructed by the ML-driven algorithm discovery + // using reinforcement learning. We fed the agent lots of inputs from + // microbenchmarks, SMHasher, low hamming distance from generated inputs and + // picked up the one that was good on micro and macrobenchmarks. auto p = Read9To16(first, len); - state = Mix(state, p.first); - v = p.second; + uint64_t lo = p.first; + uint64_t hi = p.second; + // Rotation by 53 was found to be most often useful when discovering these + // hashing algorithms with ML techniques. + lo = absl::rotr(lo, 53); + state += kMul; + lo += state; + state ^= hi; + uint128 m = state; + m *= lo; + return static_cast<uint64_t>(m ^ (m >> 64)); } else if (len >= 4) { v = Read4To8(first, len); } else if (len > 0) { diff --git a/absl/hash/internal/low_level_hash.cc b/absl/hash/internal/low_level_hash.cc index 6f9cb9c7..c917457a 100644 --- a/absl/hash/internal/low_level_hash.cc +++ b/absl/hash/internal/low_level_hash.cc @@ -15,7 +15,6 @@ #include "absl/hash/internal/low_level_hash.h" #include "absl/base/internal/unaligned_access.h" -#include "absl/numeric/bits.h" #include "absl/numeric/int128.h" namespace absl { @@ -23,24 +22,13 @@ ABSL_NAMESPACE_BEGIN namespace hash_internal { static uint64_t Mix(uint64_t v0, uint64_t v1) { -#if !defined(__aarch64__) - // The default bit-mixer uses 64x64->128-bit multiplication. absl::uint128 p = v0; p *= v1; return absl::Uint128Low64(p) ^ absl::Uint128High64(p); -#else - // The default bit-mixer above would perform poorly on some ARM microarchs, - // where calculating a 128-bit product requires a sequence of two - // instructions with a high combined latency and poor throughput. - // Instead, we mix bits using only 64-bit arithmetic, which is faster. - uint64_t p = v0 ^ absl::rotl(v1, 40); - p *= v1 ^ absl::rotl(v0, 39); - return p ^ (p >> 11); -#endif } uint64_t LowLevelHash(const void* data, size_t len, uint64_t seed, - const uint64_t salt[]) { + const uint64_t salt[5]) { const uint8_t* ptr = static_cast<const uint8_t*>(data); uint64_t starting_length = static_cast<uint64_t>(len); uint64_t current_state = seed ^ salt[0]; @@ -106,7 +94,8 @@ uint64_t LowLevelHash(const void* data, size_t len, uint64_t seed, } else if (len > 0) { // If we have at least 1 and at most 3 bytes, read all of the provided // bits into A, with some adjustments. - a = ((ptr[0] << 16) | (ptr[len >> 1] << 8) | ptr[len - 1]); + a = static_cast<uint64_t>((ptr[0] << 16) | (ptr[len >> 1] << 8) | + ptr[len - 1]); b = 0; } else { a = 0; diff --git a/absl/hash/internal/low_level_hash_test.cc b/absl/hash/internal/low_level_hash_test.cc index ae930b34..589a3d8f 100644 --- a/absl/hash/internal/low_level_hash_test.cc +++ b/absl/hash/internal/low_level_hash_test.cc @@ -452,54 +452,6 @@ TEST(LowLevelHashTest, VerifyGolden) { 0xdd497891465a2cc1, 0x6f1fe8c57a33072e, 0x2c9f4ec078c460c0, 0x9a725bde8f6a1437, 0x6ce545fa3ef61e4d, }; -#elif defined(__aarch64__) - constexpr uint64_t kGolden[kNumGoldenOutputs] = { - 0x45c0aadee165dcbe, 0x25ed8587f6f20d06, 0x5f23ae668ce7926d, - 0xfef74d1da0846719, 0x54478408e68cb7d4, 0xee27ddaf88c6fe68, - 0xb7ac7031e81867ca, 0xf1168f818ec6c36d, 0x1dd0b734a83b019a, - 0xd6ae30d4142b54fe, 0xcd860c721ccb80fb, 0x068acf8493794756, - 0xd4ada0be58681307, 0x13ffe0f64ca540ed, 0xffc1d7a3401aec02, - 0xd81c4d865cf95fb9, 0x1dd0793acede62e0, 0xa6722abbca8fe4cf, - 0x5453d3e4111a7e40, 0xf29b3e3204c9dcd2, 0x23be2980e43117f7, - 0x74e2ccbc286f08eb, 0x19ef7c0f9496003a, 0xbfbf1c3e49b27987, - 0x6e6c179eb4a82c70, 0x07f4e184216bc4fc, 0xf17fbc4254927554, - 0xe57696b70a45b1b6, 0x6d3b144631b320e8, 0xccf8729792c75a2d, - 0xe832495b41fa980b, 0x5c96cfdc7b227d34, 0xc4dca234ef4e43f4, - 0x5fc801abf9abe307, 0xe41e3c5076d88f4d, 0x522346200ddec3c3, - 0x72bed1946fd7aaa4, 0x0ac1f84dcc335f96, 0x3af78db5e0a47670, - 0x6100ebf1481f1caf, 0xf5fd10037fc651a3, 0xa01227d8944665f3, - 0x7217681c4bbc9420, 0x4adee538e3eb10d1, 0x35e1761ad96de9a7, - 0x8b370aef9613bfba, 0x824506f749eeaf59, 0x85e805fa04423991, - 0xb61e9c33283c3de7, 0xc79721bbcb039ed6, 0x04e1c19a3a1e6639, - 0x6aaf6346b68dd638, 0x601a4b496be6d0c4, 0x3ece355f91c41787, - 0xd2fc8998448d7888, 0xd7529804f843efa9, 0xabdcc38a288536aa, - 0xdd323e48a9718648, 0x2090279c0030a52a, 0xe2f90faca88a3cd1, - 0x3e0c4e92fc50e4aa, 0xa26d308798e801dd, 0x432eefeedee8c02e, - 0xca4ce494614b77df, 0xbba82911e838066d, 0x4b00821016adee4b, - 0x4cf6e526dfb5a20f, 0x5b8466495142cba2, 0xe28ac1406e88a68c, - 0x8511e5f9d3100999, 0x05acbfe02798890b, 0x74c249c7ce4a8425, - 0xdbe7468d09bc34bc, 0x11079ab10e3b9b58, 0xb7788dec9032035a, - 0xb7e8daa786513f80, 0x34c3288831f46b45, 0x014cce5f0c21ecc6, - 0xc6a8f7b024551a28, 0x49784e902e207fd8, 0x4720d32af0b55158, - 0x8df3ec5de0c1da00, 0xf4db677b2c9e6853, 0xaa419abea78d312d, - 0x181e0f91bd757443, 0xa8c45136fada083b, 0x91303b93f5f0582c, - 0x883b95c6ddc62a08, 0x93186a8875fe952b, 0xd94f533928e957e2, - 0x6ba343003e10c172, 0xc8623b620c715d6a, 0x8ca0c512e180e244, - 0xdc9b74c2536b6216, 0x8eb5fdc61b295d96, 0x2ad83966b37c95ba, - 0xb90bf154ac5edec9, 0x902cf847b326cfb3, 0x7b02d0c0ca7808ca, - 0x492f310d003ea15f, 0x3eb6497a47c95990, 0x5d46b0ced31428b7, - 0x081afa67d1986157, 0x043482ec286b20eb, 0xc103c8f18c1a2a53, - 0xe8e9995a81481e83, 0x6bb3295822bc90b5, 0xeec75297a3fa5672, - 0x591c8440c4857412, 0x74947f455aaf24ad, 0xcf0e571586ec77a9, - 0x0c2553ea8c0400ad, 0x380219118066255f, 0x7595adb88b15ebe2, - 0xb33c00696c64ae23, 0xa143516ddd7c9857, 0x39179af229248d26, - 0x65d387a6f2ee2079, 0x89f8a9b21cd2f195, 0xbfef032d25df92e6, - 0x6b7e18a36c69da71, 0x4b3b15f6c28974e6, 0x032a75917f6c544c, - 0xe3b97ecca6d287cd, 0xa4a563110d3cda81, 0x35e09e8134f4e7f1, - 0xc9419dd03a9a390e, 0x7b86fae9000fd329, 0x1e044f8d54fe74c3, - 0x9c4991d7a47e9666, 0xfb485f3a1df4fdb6, 0xb11519969eeb94ff, - 0x3224ea1c44caeb8d, 0x86570bbd7cc6b80d, - }; #else constexpr uint64_t kGolden[kNumGoldenOutputs] = { 0xe5a40d39ab796423, 0x1766974bf7527d81, 0x5c3bbbe230db17a8, |