Commit e06814f9 authored by Xiao Shi's avatar Xiao Shi Committed by Facebook Github Bot

implement `commutative_hash_combine_*` for unordered containers

Summary:
`hash_range(c.begin(), c.end()` combines hashes of individual elements of a
container in an ordered manner. This diff provides the equivalent for
unordered containers.

Unlike `hash_range`, `commutative_hash_combine_range` defaults to `folly::Hash`
as its hasher; it mixes the individual hash if the `hasher` is not deemed
avalanching.

It uses a commutative accumulator described in this paper:
https://www.preprints.org/manuscript/201710.0192/v1/download
In the experiments in the paper, the symmetric polynomial yielded a better
spread of hash values and lower collision rates than `+` or `xor`.

Reviewed By: yfeldblum

Differential Revision: D9688687

fbshipit-source-id: c812b25975a53a868d98f78645146cb8bdbb5c32
parent 34b79e43
......@@ -559,7 +559,10 @@ class StdHasher {
// hashable objects. hash_combine_generic takes a class Hasher implementing
// hash<T>; hash_combine uses a default hasher StdHasher that uses std::hash.
// hash_combine_generic hashes each argument and combines those hashes in
// an order-dependent way to yield a new hash.
// an order-dependent way to yield a new hash; hash_range does so (also in an
// order-dependent way) for items in the range [first, last);
// commutative_hash_combine_* hashes values but combines them in an
// order-independent way to yield a new hash.
// This is the Hash128to64 function from Google's cityhash (available
// under the MIT License). We use it to reduce multiple 64 bit hashes
......@@ -577,6 +580,23 @@ inline uint64_t hash_128_to_64(
return b;
}
template <class Hash, class Value>
uint64_t commutative_hash_combine_value_generic(
uint64_t seed,
Hash const& hasher,
Value const& value) {
auto const x = hasher(value);
auto const y = IsAvalanchingHasher<Hash, Value>::value ? x : twang_mix64(x);
// Commutative accumulator taken from this paper:
// https://www.preprints.org/manuscript/201710.0192/v1/download
return 3860031 + (seed + y) * 2779 + (seed * y * 2);
}
// hash_range combines hashes of items in the range [first, last) in an
// __order-dependent__ fashion. To hash an unordered container (e.g.,
// folly::dynamic, hash tables like std::unordered_map), use
// commutative_hash_combine_range instead, which combines hashes of items
// independent of ordering.
template <
class Iter,
class Hash = std::hash<typename std::iterator_traits<Iter>::value_type>>
......@@ -588,6 +608,23 @@ hash_range(Iter begin, Iter end, uint64_t hash = 0, Hash hasher = Hash()) {
return hash;
}
template <class Hash, class Iter>
uint64_t commutative_hash_combine_range_generic(
uint64_t seed,
Hash const& hasher,
Iter first,
Iter last) {
while (first != last) {
seed = commutative_hash_combine_value_generic(seed, hasher, *first++);
}
return seed;
}
template <class Iter>
uint64_t commutative_hash_combine_range(Iter first, Iter last) {
return commutative_hash_combine_range_generic(0, Hash{}, first, last);
}
namespace detail {
using c_array_size_t = size_t[];
} // namespace detail
......@@ -616,12 +653,28 @@ size_t hash_combine_generic(
}
}
template <typename Hash, typename... Value>
uint64_t commutative_hash_combine_generic(
uint64_t seed,
Hash const& hasher,
Value const&... value) {
// variadic foreach:
uint64_t _[] = {
0, seed = commutative_hash_combine_value_generic(seed, hasher, value)...};
(void)_;
return seed;
}
template <typename T, typename... Ts>
size_t hash_combine(const T& t, const Ts&... ts) noexcept(
noexcept(hash_combine_generic(StdHasher{}, t, ts...))) {
return hash_combine_generic(StdHasher{}, t, ts...);
}
template <typename... Value>
uint64_t commutative_hash_combine(Value const&... value) {
return commutative_hash_combine_generic(0, Hash{}, value...);
}
} // namespace hash
// recursion
......
......@@ -18,6 +18,7 @@
#include <stdint.h>
#include <random>
#include <unordered_map>
#include <unordered_set>
#include <utility>
......@@ -547,6 +548,36 @@ TEST(Hash, hash_range) {
EXPECT_EQ(hash_vector<int>({}), hash_vector<float>({}));
}
TEST(Hash, commutative_hash_combine) {
EXPECT_EQ(
commutative_hash_combine_value_generic(
folly::Hash{}(12345ul), folly::Hash{}, 6789ul),
commutative_hash_combine_value_generic(
folly::Hash{}(6789ul), folly::Hash{}, 12345ul));
std::vector<int> v = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
std::random_device rd;
std::mt19937 g(rd());
auto h = commutative_hash_combine_range(v.begin(), v.end());
for (int i = 0; i < 100; i++) {
std::shuffle(v.begin(), v.end(), g);
EXPECT_EQ(h, commutative_hash_combine_range(v.begin(), v.end()));
}
EXPECT_NE(
h,
commutative_hash_combine_range_generic(
/* seed = */ 0xdeadbeef, folly::Hash{}, v.begin(), v.end()));
EXPECT_NE(
h, commutative_hash_combine_range(v.begin(), v.begin() + (v.size() - 1)));
EXPECT_EQ(h, commutative_hash_combine(1, 2, 3, 4, 5, 6, 7, 8, 9, 10));
EXPECT_EQ(h, commutative_hash_combine(10, 2, 3, 4, 5, 6, 7, 8, 9, 1));
EXPECT_EQ(
commutative_hash_combine(12345, 6789),
commutative_hash_combine(6789, 12345));
}
TEST(Hash, std_tuple_different_hash) {
typedef std::tuple<int64_t, std::string, int32_t> tuple3;
tuple3 t1(42, "foo", 1);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment