Commit 5ca9b9dc authored by Nathan Bronson's avatar Nathan Bronson Committed by Facebook Github Bot

add IsAvalanchingHasher trait

Summary:
This diff adds folly::IsAvalanchingHasher<H, K>, which identifies
hash functors that exceed the standard's quality requirement by also
being avalanching.  This is useful for code that wants to map hash values
onto a restricted range or compute a secondary hash value without doing
extra work.

Reviewed By: yfeldblum

Differential Revision: D7180217

fbshipit-source-id: 8c402937d0a654c0ec32c62666e9dc4e0943f769
parent 73c483aa
...@@ -1360,6 +1360,12 @@ struct hasher< ...@@ -1360,6 +1360,12 @@ struct hasher<
} }
}; };
template <typename H, typename K>
struct IsAvalanchingHasher;
template <typename T, typename E, typename K>
struct IsAvalanchingHasher<hasher<folly::Range<T*>, E>, K> : std::true_type {};
/** /**
* _sp is a user-defined literal suffix to make an appropriate Range * _sp is a user-defined literal suffix to make an appropriate Range
* specialization from a literal string. * specialization from a literal string.
......
...@@ -388,6 +388,10 @@ struct integral_hasher { ...@@ -388,6 +388,10 @@ struct integral_hasher {
} }
}; };
template <typename I>
using integral_hasher_avalanches =
std::integral_constant<bool, sizeof(I) >= 8 || sizeof(size_t) == 4>;
template <typename F> template <typename F>
struct float_hasher { struct float_hasher {
size_t operator()(F const& f) const { size_t operator()(F const& f) const {
...@@ -409,6 +413,10 @@ struct float_hasher { ...@@ -409,6 +413,10 @@ struct float_hasher {
} }
}; };
template <typename F>
using float_hasher_avalanches =
std::integral_constant<bool, sizeof(F) == 8 || sizeof(size_t) == 4>;
} // namespace detail } // namespace detail
template <class Key, class Enable = void> template <class Key, class Enable = void>
...@@ -426,6 +434,47 @@ struct Hash { ...@@ -426,6 +434,47 @@ struct Hash {
} }
}; };
// IsAvalanchingHasher<H, K> extends std::integral_constant<bool, V>.
// V will be true if it is known that when a hasher of type H computes
// the hash of a key of type K, any subset of B bits from the resulting
// hash value is usable in a context that can tolerate a collision rate
// of about 1/2^B. (Input bits lost implicitly converting between K and
// the argument of H::operator() are not considered here; K is separate
// to handle the case of generic hashers like folly::Hash).
//
// The standard's definition of hash quality is based on the chance hash
// collisions using the entire hash value. No requirement is made that
// this property holds for subsets of the bits. In addition, hashed keys
// in real-world workloads are not chosen uniformly from the entire domain
// of keys, which can further increase the collision rate for a subset
// of bits. For example, std::hash<uint64_t> in libstdc++-v3 and libc++
// is the identity function. This hash function has no collisions when
// considering hash values in their entirety, but for real-world workloads
// the high bits are likely to always be zero.
//
// Some hash functions provide a stronger guarantee -- the standard's
// collision property is also preserved for subsets of the output bits and
// for sub-domains of keys. Another way to say this is that each bit of
// the hash value contains entropy from the entire input, changes to the
// input avalanche across all of the bits of the output. The distinction
// is useful when mapping the hash value onto a smaller space efficiently
// (such as when implementing a hash table).
template <typename Hasher, typename Key>
struct IsAvalanchingHasher : std::false_type {};
template <typename T, typename E, typename K>
struct IsAvalanchingHasher<hasher<T, E>, K>
: std::conditional<
std::is_enum<T>::value || std::is_integral<T>::value,
detail::integral_hasher_avalanches<T>,
typename std::conditional<
std::is_floating_point<T>::value,
detail::float_hasher_avalanches<T>,
std::false_type>::type>::type {};
template <typename K>
struct IsAvalanchingHasher<Hash, K> : IsAvalanchingHasher<hasher<K>, K> {};
template <> template <>
struct hasher<bool> { struct hasher<bool> {
size_t operator()(bool key) const { size_t operator()(bool key) const {
...@@ -433,6 +482,8 @@ struct hasher<bool> { ...@@ -433,6 +482,8 @@ struct hasher<bool> {
return key ? std::numeric_limits<size_t>::max() : 0; return key ? std::numeric_limits<size_t>::max() : 0;
} }
}; };
template <typename K>
struct IsAvalanchingHasher<hasher<bool>, K> : std::true_type {};
template <> template <>
struct hasher<unsigned long long> struct hasher<unsigned long long>
...@@ -490,20 +541,24 @@ struct hasher<std::string> { ...@@ -490,20 +541,24 @@ struct hasher<std::string> {
hash::SpookyHashV2::Hash64(key.data(), key.size(), 0)); hash::SpookyHashV2::Hash64(key.data(), key.size(), 0));
} }
}; };
template <typename K>
struct IsAvalanchingHasher<hasher<std::string>, K> : std::true_type {};
template <class T> template <typename T>
struct hasher<T, typename std::enable_if<std::is_enum<T>::value, void>::type> { struct hasher<T, typename std::enable_if<std::is_enum<T>::value, void>::type> {
size_t operator()(T key) const { size_t operator()(T key) const {
return Hash()(static_cast<typename std::underlying_type<T>::type>(key)); return Hash()(static_cast<typename std::underlying_type<T>::type>(key));
} }
}; };
template <class T1, class T2> template <typename T1, typename T2>
struct hasher<std::pair<T1, T2>> { struct hasher<std::pair<T1, T2>> {
size_t operator()(const std::pair<T1, T2>& key) const { size_t operator()(const std::pair<T1, T2>& key) const {
return Hash()(key.first, key.second); return Hash()(key.first, key.second);
} }
}; };
template <typename T1, typename T2, typename K>
struct IsAvalanchingHasher<hasher<std::pair<T1, T2>>, K> : std::true_type {};
template <typename... Ts> template <typename... Ts>
struct hasher<std::tuple<Ts...>> { struct hasher<std::tuple<Ts...>> {
...@@ -512,6 +567,14 @@ struct hasher<std::tuple<Ts...>> { ...@@ -512,6 +567,14 @@ struct hasher<std::tuple<Ts...>> {
} }
}; };
// combiner for multi-arg tuple also mixes bits
template <typename T, typename K>
struct IsAvalanchingHasher<hasher<std::tuple<T>>, K>
: IsAvalanchingHasher<hasher<T>, K> {};
template <typename T1, typename T2, typename... Ts, typename K>
struct IsAvalanchingHasher<hasher<std::tuple<T1, T2, Ts...>>, K>
: std::true_type {};
// recursion // recursion
template <size_t index, typename... Ts> template <size_t index, typename... Ts>
struct TupleHasher { struct TupleHasher {
...@@ -566,4 +629,34 @@ struct hash<std::tuple<Ts...>> { ...@@ -566,4 +629,34 @@ struct hash<std::tuple<Ts...>> {
return hasher(key); return hasher(key);
} }
}; };
} // namespace std } // namespace std
namespace folly {
// These IsAvalanchingHasher<std::hash<T>> specializations refer to the
// std::hash specializations defined in this file
template <typename U1, typename U2, typename K>
struct IsAvalanchingHasher<std::hash<std::pair<U1, U2>>, K> : std::true_type {};
template <typename U, typename K>
struct IsAvalanchingHasher<std::hash<std::tuple<U>>, K>
: IsAvalanchingHasher<std::hash<U>, U> {};
template <typename U1, typename U2, typename... Us, typename K>
struct IsAvalanchingHasher<std::hash<std::tuple<U1, U2, Us...>>, K>
: std::true_type {};
// std::hash<std::string> is avalanching on libstdc++-v3 (code checked),
// libc++ (code checked), and MSVC (based on online information).
// std::hash for float and double on libstdc++-v3 are avalanching,
// but they are not on libc++. std::hash for integral types is not
// avalanching for libstdc++-v3 or libc++. We're conservative here and
// just mark std::string as avalanching. std::string_view will also be
// so, once it exists.
template <typename... Args, typename K>
struct IsAvalanchingHasher<std::hash<std::basic_string<Args...>>, K>
: std::true_type {};
} // namespace folly
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include <utility> #include <utility>
#include <folly/MapUtil.h> #include <folly/MapUtil.h>
#include <folly/Range.h>
#include <folly/portability/GTest.h> #include <folly/portability/GTest.h>
using namespace folly::hash; using namespace folly::hash;
...@@ -480,3 +481,261 @@ INSTANTIATE_TEST_CASE_P( ...@@ -480,3 +481,261 @@ INSTANTIATE_TEST_CASE_P(
0xd9b957fb7fe794c5}, 0xd9b957fb7fe794c5},
(FNVTestParam){"http://norvig.com/21-days.html", // 136 (FNVTestParam){"http://norvig.com/21-days.html", // 136
0x07aaa640476e0b9a})); 0x07aaa640476e0b9a}));
namespace {
enum class TestEnum {
MIN = 0,
ITEM = 1,
MAX = 2,
};
enum class TestBigEnum : uint64_t {
ITEM = 1,
};
struct TestStruct {};
} // namespace
namespace std {
template <>
struct hash<TestEnum> : hash<int> {};
template <>
struct hash<TestStruct> {
std::size_t operator()(TestStruct const&) const {
return 0;
}
};
} // namespace std
//////// static checks
static_assert(!folly::IsAvalanchingHasher<std::hash<int>, int>::value, "");
static_assert(
!folly::IsAvalanchingHasher<std::hash<char const*>, char const*>::value,
"");
static_assert(!folly::IsAvalanchingHasher<std::hash<float>, float>::value, "");
static_assert(
!folly::IsAvalanchingHasher<std::hash<double>, double>::value,
"");
static_assert(
!folly::IsAvalanchingHasher<std::hash<long double>, long double>::value,
"");
static_assert(
folly::IsAvalanchingHasher<std::hash<std::string>, std::string>::value,
"");
static_assert(
!folly::IsAvalanchingHasher<std::hash<TestEnum>, TestEnum>::value,
"");
static_assert(
!folly::IsAvalanchingHasher<std::hash<TestStruct>, TestStruct>::value,
"");
// these come from folly/hash/Hash.h
static_assert(
folly::IsAvalanchingHasher<
std::hash<std::pair<int, int>>,
std::pair<int, int>>::value,
"");
static_assert(
!folly::IsAvalanchingHasher<std::hash<std::tuple<int>>, std::tuple<int>>::
value,
"");
static_assert(
folly::IsAvalanchingHasher<
std::hash<std::tuple<std::string>>,
std::tuple<std::string>>::value,
"");
static_assert(
folly::IsAvalanchingHasher<
std::hash<std::tuple<int, int>>,
std::tuple<int, int>>::value,
"");
static_assert(
folly::IsAvalanchingHasher<
std::hash<std::tuple<int, int, int>>,
std::tuple<int, int, int>>::value,
"");
static_assert(!folly::IsAvalanchingHasher<folly::Hash, uint8_t>::value, "");
static_assert(!folly::IsAvalanchingHasher<folly::Hash, char>::value, "");
static_assert(!folly::IsAvalanchingHasher<folly::Hash, uint16_t>::value, "");
static_assert(!folly::IsAvalanchingHasher<folly::Hash, int16_t>::value, "");
static_assert(!folly::IsAvalanchingHasher<folly::Hash, uint32_t>::value, "");
static_assert(!folly::IsAvalanchingHasher<folly::Hash, int32_t>::value, "");
static_assert(folly::IsAvalanchingHasher<folly::Hash, uint64_t>::value, "");
static_assert(folly::IsAvalanchingHasher<folly::Hash, int64_t>::value, "");
static_assert(
folly::IsAvalanchingHasher<folly::Hash, folly::StringPiece>::value,
"");
static_assert(folly::IsAvalanchingHasher<folly::Hash, std::string>::value, "");
static_assert(!folly::IsAvalanchingHasher<folly::Hash, TestEnum>::value, "");
static_assert(folly::IsAvalanchingHasher<folly::Hash, TestBigEnum>::value, "");
static_assert(
!folly::IsAvalanchingHasher<folly::hasher<uint8_t>, uint8_t>::value,
"");
static_assert(
!folly::IsAvalanchingHasher<folly::hasher<char>, char>::value,
"");
static_assert(
!folly::IsAvalanchingHasher<folly::hasher<uint16_t>, uint16_t>::value,
"");
static_assert(
!folly::IsAvalanchingHasher<folly::hasher<int16_t>, int16_t>::value,
"");
static_assert(
!folly::IsAvalanchingHasher<folly::hasher<uint32_t>, uint32_t>::value,
"");
static_assert(
!folly::IsAvalanchingHasher<folly::hasher<int32_t>, int32_t>::value,
"");
static_assert(
folly::IsAvalanchingHasher<folly::hasher<uint64_t>, uint64_t>::value,
"");
static_assert(
folly::IsAvalanchingHasher<folly::hasher<int64_t>, int64_t>::value,
"");
static_assert(
!folly::IsAvalanchingHasher<folly::hasher<float>, float>::value,
"");
static_assert(
folly::IsAvalanchingHasher<folly::hasher<double>, double>::value,
"");
static_assert(
folly::IsAvalanchingHasher<folly::hasher<std::string>, std::string>::value,
"");
static_assert(
folly::IsAvalanchingHasher<folly::hasher<folly::StringPiece>, std::string>::
value,
"");
static_assert(
folly::IsAvalanchingHasher<folly::hasher<std::string>, std::string>::value,
"");
static_assert(
folly::IsAvalanchingHasher<
folly::hasher<std::pair<int, int>>,
std::pair<int, int>>::value,
"");
static_assert(
!folly::IsAvalanchingHasher<
folly::hasher<std::tuple<int>>,
std::tuple<int>>::value,
"");
static_assert(
folly::IsAvalanchingHasher<
folly::hasher<std::tuple<std::string>>,
std::tuple<std::string>>::value,
"");
static_assert(
folly::IsAvalanchingHasher<
folly::hasher<std::tuple<int, int>>,
std::tuple<int, int>>::value,
"");
static_assert(
folly::IsAvalanchingHasher<
folly::hasher<std::tuple<int, int, int>>,
std::tuple<int, int, int>>::value,
"");
static_assert(
!folly::IsAvalanchingHasher<folly::hasher<TestEnum>, TestEnum>::value,
"");
static_assert(
folly::IsAvalanchingHasher<folly::hasher<TestBigEnum>, TestBigEnum>::value,
"");
//////// dynamic checks
namespace {
template <typename H, typename T, typename F>
void verifyAvalanching(T initialValue, F const& advance) {
// This doesn't check probabilities, but does verify that every bit
// changed independently of every other bit, in both directions, when
// traversing a sequence of dependent changes. Note that it is NOT
// sufficient to just use a random sequence here, because even the
// identity function will pass. As constructed this will require
// 2^63 steps to complete for an identity hash, because none of the
// transitions with on == 63 will occur until then.
H const hasher;
constexpr std::size_t N = sizeof(decltype(hasher(initialValue))) * 8;
// seen[i][j] if we have seen i flip on at the same time as j went off
bool seen[N][N] = {};
std::size_t unseenCount = N * (N - 1);
auto v = initialValue;
auto h = hasher(v);
std::size_t steps = 0;
// wait for 95% coverage
while (unseenCount > (N * (N - 1)) / 95) {
++steps;
auto hPrev = h;
advance(v);
h = hasher(v);
uint64_t delta = hPrev ^ h;
for (std::size_t i = 0; i < N - 1; ++i) {
if (((delta >> i) & 1) == 0) {
continue;
}
// we know i flipped
for (std::size_t j = i + 1; j < N; ++j) {
if (((delta >> j) & 1) == 0) {
continue;
}
// we know j flipped
bool iOn = ((hPrev >> i) & 1) == 0;
bool jOn = ((hPrev >> j) & 1) == 0;
if (iOn != jOn) {
auto on = iOn ? i : j;
auto off = iOn ? j : i;
if (!seen[on][off]) {
seen[on][off] = true;
--unseenCount;
}
}
}
}
// we should actually only need a couple hundred
ASSERT_LT(steps, 1000) << unseenCount << " of " << (N * (N - 1))
<< " pair transitions unseen";
}
}
} // namespace
TEST(Traits, stdHashPairAvalances) {
verifyAvalanching<std::hash<std::pair<int, int>>>(
std::make_pair(0, 0), [](std::pair<int, int>& v) { v.first++; });
}
TEST(Traits, stdHashTuple2Avalances) {
verifyAvalanching<std::hash<std::tuple<int, int>>>(
std::make_tuple(0, 0),
[](std::tuple<int, int>& v) { std::get<0>(v) += 1; });
}
TEST(Traits, stdHashStringAvalances) {
verifyAvalanching<std::hash<std::string>, std::string>(
"00000000000000000000000000000", [](std::string& str) {
std::size_t i = 0;
while (str[i] == '1') {
str[i] = '0';
++i;
}
str[i] = '1';
});
}
TEST(Traits, follyHashUint64Avalances) {
verifyAvalanching<folly::Hash>(uint64_t{0}, [](uint64_t& v) { v++; });
}
TEST(Traits, follyHasherInt64Avalances) {
verifyAvalanching<folly::hasher<int64_t>>(
int64_t{0}, [](int64_t& v) { v++; });
}
TEST(Traits, follyHasherDoubleAvalanches) {
verifyAvalanching<folly::hasher<double>>(0.0, [](double& v) { v += 1; });
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment