diff options
author | Abseil Team <absl-team@google.com> | 2022-01-19 11:04:50 -0800 |
---|---|---|
committer | dinord <dino.radakovich@gmail.com> | 2022-01-19 14:36:25 -0500 |
commit | fbbb5865a562c9a9167d71c1cf56b82025a8f065 (patch) | |
tree | 212c8694bb6cf1d76af2b4aabac02c4910f6beab /absl/hash/internal/hash.h | |
parent | 9bb42857112ad13b23de4333fbb75eb47db9de95 (diff) | |
download | abseil-fbbb5865a562c9a9167d71c1cf56b82025a8f065.tar.gz abseil-fbbb5865a562c9a9167d71c1cf56b82025a8f065.tar.bz2 abseil-fbbb5865a562c9a9167d71c1cf56b82025a8f065.zip |
Export of internal Abseil changes
--
487c7a754a3b93bc0f9de14bdced48007a96ae55 by Greg Falcon <gfalcon@google.com>:
Add support for absl::Hash to hash unordered containers. These can now be hashed directly, as well as combined in AbslHashValue implementations.
This also adds a new method, `H::combine_unordered()`, to the public AbslHashValue hash state API. This allows users to implement hash specializations for their own unordered collection types.
A traits class, `H::is_hashable<T>`, is also added to the hash state API. H::is_hashable<T>::value reflects whether type T is considered hashable by the AbslHashValue framework. This allows users to properly SFINAE templated versions of AbslHashValue. (The AbslHashValue implementation added to raw_hash_set shows an example of its use.)
PiperOrigin-RevId: 422856706
GitOrigin-RevId: 487c7a754a3b93bc0f9de14bdced48007a96ae55
Change-Id: Id31fd4ccba282f8c9ae6fcee6ae0ad0f7879f456
Diffstat (limited to 'absl/hash/internal/hash.h')
-rw-r--r-- | absl/hash/internal/hash.h | 180 |
1 files changed, 166 insertions, 14 deletions
diff --git a/absl/hash/internal/hash.h b/absl/hash/internal/hash.h index 97b68bad..a424e014 100644 --- a/absl/hash/internal/hash.h +++ b/absl/hash/internal/hash.h @@ -23,6 +23,7 @@ #include <array> #include <bitset> #include <cmath> +#include <cstddef> #include <cstring> #include <deque> #include <forward_list> @@ -36,6 +37,8 @@ #include <string> #include <tuple> #include <type_traits> +#include <unordered_map> +#include <unordered_set> #include <utility> #include <vector> @@ -54,6 +57,9 @@ namespace absl { ABSL_NAMESPACE_BEGIN + +class HashState; + namespace hash_internal { // Internal detail: Large buffers are hashed in smaller chunks. This function @@ -115,24 +121,66 @@ class PiecewiseCombiner { size_t position_; }; +// is_hashable() +// +// Trait class which returns true if T is hashable by the absl::Hash framework. +// Used for the AbslHashValue implementations for composite types below. +template <typename T> +struct is_hashable; + // HashStateBase // -// A hash state object represents an intermediate state in the computation -// of an unspecified hash algorithm. `HashStateBase` provides a CRTP style -// base class for hash state implementations. Developers adding type support -// for `absl::Hash` should not rely on any parts of the state object other than -// the following member functions: +// An internal implementation detail that contains common implementation details +// for all of the "hash state objects" objects generated by Abseil. This is not +// a public API; users should not create classes that inherit from this. +// +// A hash state object is the template argument `H` passed to `AbslHashValue`. +// It represents an intermediate state in the computation of an unspecified hash +// algorithm. `HashStateBase` provides a CRTP style base class for hash state +// implementations. Developers adding type support for `absl::Hash` should not +// rely on any parts of the state object other than the following member +// functions: // // * HashStateBase::combine() // * HashStateBase::combine_contiguous() +// * HashStateBase::combine_unordered() // -// A derived hash state class of type `H` must provide a static member function +// A derived hash state class of type `H` must provide a public member function // with a signature similar to the following: // // `static H combine_contiguous(H state, const unsigned char*, size_t)`. // +// It must also provide a private template method named RunCombineUnordered. +// +// A "consumer" is a 1-arg functor returning void. Its argument is a reference +// to an inner hash state object, and it may be called multiple times. When +// called, the functor consumes the entropy from the provided state object, +// and resets that object to its empty state. +// +// A "combiner" is a stateless 2-arg functor returning void. Its arguments are +// an inner hash state object and an ElementStateConsumer functor. A combiner +// uses the provided inner hash state object to hash each element of the +// container, passing the inner hash state object to the consumer after hashing +// each element. +// +// Given these definitions, a derived hash state class of type H +// must provide a private template method with a signature similar to the +// following: +// +// `template <typename CombinerT>` +// `static H RunCombineUnordered(H outer_state, CombinerT combiner)` +// +// This function is responsible for constructing the inner state object and +// providing a consumer to the combiner. It uses side effects of the consumer +// and combiner to mix the state of each element in an order-independent manner, +// and uses this to return an updated value of `outer_state`. +// +// This inside-out approach generates efficient object code in the normal case, +// but allows us to use stack storage to implement the absl::HashState type +// erasure mechanism (avoiding heap allocations while hashing). +// // `HashStateBase` will provide a complete implementation for a hash state -// object in terms of this method. +// object in terms of these two methods. // // Example: // @@ -141,6 +189,10 @@ class PiecewiseCombiner { // static H combine_contiguous(H state, const unsigned char*, size_t); // using MyHashState::HashStateBase::combine; // using MyHashState::HashStateBase::combine_contiguous; +// using MyHashState::HashStateBase::combine_unordered; +// private: +// template <typename CombinerT> +// static H RunCombineUnordered(H state, CombinerT combiner); // }; template <typename H> class HashStateBase { @@ -181,7 +233,30 @@ class HashStateBase { template <typename T> static H combine_contiguous(H state, const T* data, size_t size); + template <typename I> + static H combine_unordered(H state, I begin, I end); + using AbslInternalPiecewiseCombiner = PiecewiseCombiner; + + template <typename T> + using is_hashable = absl::hash_internal::is_hashable<T>; + + private: + // Common implementation of the iteration step of a "combiner", as described + // above. + template <typename I> + struct CombineUnorderedCallback { + I begin; + I end; + + template <typename InnerH, typename ElementStateConsumer> + void operator()(InnerH inner_state, ElementStateConsumer cb) { + for (; begin != end; ++begin) { + inner_state = H::combine(std::move(inner_state), *begin); + cb(inner_state); + } + } + }; }; // is_uniquely_represented @@ -350,13 +425,6 @@ H AbslHashValue(H hash_state, std::nullptr_t) { // AbslHashValue for Composite Types // ----------------------------------------------------------------------------- -// is_hashable() -// -// Trait class which returns true if T is hashable by the absl::Hash framework. -// Used for the AbslHashValue implementations for composite types below. -template <typename T> -struct is_hashable; - // AbslHashValue() for hashing pairs template <typename H, typename T1, typename T2> typename std::enable_if<is_hashable<T1>::value && is_hashable<T2>::value, @@ -503,8 +571,10 @@ AbslHashValue(H hash_state, const std::vector<T, Allocator>& vector) { } // AbslHashValue special cases for hashing std::vector<bool> + #if defined(ABSL_IS_BIG_ENDIAN) && \ (defined(__GLIBCXX__) || defined(__GLIBCPP__)) + // std::hash in libstdc++ does not work correctly with vector<bool> on Big // Endian platforms therefore we need to implement a custom AbslHashValue for // it. More details on the bug: @@ -588,6 +658,55 @@ typename std::enable_if<is_hashable<Key>::value, H>::type AbslHashValue( } // ----------------------------------------------------------------------------- +// AbslHashValue for Unordered Associative Containers +// ----------------------------------------------------------------------------- + +// AbslHashValue for hashing std::unordered_set +template <typename H, typename Key, typename Hash, typename KeyEqual, + typename Alloc> +typename std::enable_if<is_hashable<Key>::value, H>::type AbslHashValue( + H hash_state, const std::unordered_set<Key, Hash, KeyEqual, Alloc>& s) { + return H::combine( + H::combine_unordered(std::move(hash_state), s.begin(), s.end()), + s.size()); +} + +// AbslHashValue for hashing std::unordered_multiset +template <typename H, typename Key, typename Hash, typename KeyEqual, + typename Alloc> +typename std::enable_if<is_hashable<Key>::value, H>::type AbslHashValue( + H hash_state, + const std::unordered_multiset<Key, Hash, KeyEqual, Alloc>& s) { + return H::combine( + H::combine_unordered(std::move(hash_state), s.begin(), s.end()), + s.size()); +} + +// AbslHashValue for hashing std::unordered_set +template <typename H, typename Key, typename T, typename Hash, + typename KeyEqual, typename Alloc> +typename std::enable_if<is_hashable<Key>::value && is_hashable<T>::value, + H>::type +AbslHashValue(H hash_state, + const std::unordered_map<Key, T, Hash, KeyEqual, Alloc>& s) { + return H::combine( + H::combine_unordered(std::move(hash_state), s.begin(), s.end()), + s.size()); +} + +// AbslHashValue for hashing std::unordered_multiset +template <typename H, typename Key, typename T, typename Hash, + typename KeyEqual, typename Alloc> +typename std::enable_if<is_hashable<Key>::value && is_hashable<T>::value, + H>::type +AbslHashValue(H hash_state, + const std::unordered_multimap<Key, T, Hash, KeyEqual, Alloc>& s) { + return H::combine( + H::combine_unordered(std::move(hash_state), s.begin(), s.end()), + s.size()); +} + +// ----------------------------------------------------------------------------- // AbslHashValue for Wrapper Types // ----------------------------------------------------------------------------- @@ -830,6 +949,31 @@ class ABSL_DLL MixingHashState : public HashStateBase<MixingHashState> { // move-only ensures that there is only one non-moved-from object. MixingHashState() : state_(Seed()) {} + friend class MixingHashState::HashStateBase; + + template <typename CombinerT> + static MixingHashState RunCombineUnordered(MixingHashState state, + CombinerT combiner) { + uint64_t unordered_state = 0; + combiner(MixingHashState{}, [&](MixingHashState& inner_state) { + // Add the hash state of the element to the running total, but mix the + // carry bit back into the low bit. This in intended to avoid losing + // entropy to overflow, especially when unordered_multisets contain + // multiple copies of the same value. + auto element_state = inner_state.state_; + unordered_state += element_state; + if (unordered_state < element_state) { + ++unordered_state; + } + inner_state = MixingHashState{}; + }); + return MixingHashState::combine(std::move(state), unordered_state); + } + + // Allow the HashState type-erasure implementation to invoke + // RunCombinedUnordered() directly. + friend class absl::HashState; + // Workaround for MSVC bug. // We make the type copyable to fix the calling convention, even though we // never actually copy it. Keep it private to not affect the public API of the @@ -1064,6 +1208,14 @@ H HashStateBase<H>::combine_contiguous(H state, const T* data, size_t size) { return hash_internal::hash_range_or_bytes(std::move(state), data, size); } +// HashStateBase::combine_unordered() +template <typename H> +template <typename I> +H HashStateBase<H>::combine_unordered(H state, I begin, I end) { + return H::RunCombineUnordered(std::move(state), + CombineUnorderedCallback<I>{begin, end}); +} + // HashStateBase::PiecewiseCombiner::add_buffer() template <typename H> H PiecewiseCombiner::add_buffer(H state, const unsigned char* data, |