Commit 05975ce4 authored by Nathan Bronson's avatar Nathan Bronson Committed by Facebook Github Bot

heterogeneous string keys by default in F14, with general mechanism

Summary:
This diff adds folly types HeterogeneousAccessLess<T>,
HeterogeneousAccessEqualTo<T>, and HeterogeneousAccessHash<T> that
are suitable as the default functor types for containers that support
heterogeneous access, and it makes those the default for F14 maps and
sets.  The new functor types will be marked as transparent when possible,
otherwise they will fall back to the non-transparent std::less<T>,
std::equal_to<T>, and std::hash<T> respectively.

Heterogeneous lookup is provided for types that are implicitly convertible
to Range<T const*>, where T is an integral type.  This includes
std::string, std::string_view (when available), folly::StringPiece,
folly::MutableStringPiece, std::array, std::vector, folly::small_vector.

HeterogeneousAccessHash<T> will use folly::hasher<folly::Range<
T::value_type const*>> for hashing when it provides a transparent hash,
except that for libstdc++ and libc++ we special-case the case hashing
Range<char const*> to use the same hash function as std::string.
That makes this diff performance neutral for existing use cases.

Reviewed By: ot

Differential Revision: D8768761

fbshipit-source-id: ded7c86e30479409cf838ac8219241622a4e2332
parent 4ac6c276
......@@ -599,7 +599,6 @@ class Range {
return const_range_type(*this);
}
// Works only for Range<const char*> and Range<char*>
int compare(const const_range_type& o) const {
const size_type tsize = this->size();
const size_type osize = o.size();
......@@ -1491,10 +1490,16 @@ struct hasher;
template <class T>
struct hasher<
folly::Range<T*>,
typename std::enable_if<std::is_pod<T>::value, void>::type> {
std::enable_if_t<std::is_integral<T>::value, void>> {
using folly_is_avalanching = std::true_type;
size_t operator()(folly::Range<T*> r) const {
// std::is_integral<T> is too restrictive, but is sufficient to
// guarantee we can just hash all of the underlying bytes to get a
// suitable hash of T. Something like absl::is_uniquely_represented<T>
// would be better. std::is_pod is not enough, because POD types
// can contain pointers and padding. Also, floating point numbers
// may be == without being bit-identical.
return hash::SpookyHashV2::Hash64(r.begin(), r.size() * sizeof(T), 0);
}
};
......
/*
* Copyright 2018-present Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
namespace folly {
template <typename T, typename Enable = void>
struct HeterogeneousAccessEqualTo;
template <typename T, typename Enable = void>
struct HeterogeneousAccessHash;
template <typename CharT>
struct TransparentStringEqualTo;
template <typename CharT>
struct TransparentStringHash;
} // namespace folly
/*
* Copyright 2018-present Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <functional>
#include <string>
#include <folly/Range.h>
#include <folly/Traits.h>
#include <folly/container/HeterogeneousAccess-pre.h>
#include <folly/hash/Hash.h>
namespace folly {
// folly::HeterogeneousAccessEqualTo<T>, and
// folly::HeterogeneousAccessHash<T> are functors suitable as defaults
// for containers that support heterogeneous access. When possible, they
// will be marked as transparent. When no transparent implementation
// is available then they fall back to std::equal_to and std::hash
// respectively. Since the fallbacks are not marked as transparent,
// heterogeneous lookup won't be available in that case. A corresponding
// HeterogeneousAccessLess<T> could be easily added if desired.
//
// If T can be implicitly converted to a StringPiece or
// to a Range<T::value_type const*> that is hashable, then
// HeterogeneousAccess{EqualTo,Hash}<T> will be transparent without any
// additional work. In practice this is true for T that can be convered to
// StringPiece or Range<IntegralType const*>. This includes std::string,
// std::string_view (when available), std::array, folly::Range,
// std::vector, and folly::small_vector.
//
// Additional specializations of HeterogeneousAccess*<T> should go in
// the header that declares T. Don't forget to typedef is_transparent to
// void and folly_is_avalanching to std::true_type in the specializations.
template <typename T, typename Enable>
struct HeterogeneousAccessEqualTo : std::equal_to<T> {};
template <typename T, typename Enable>
struct HeterogeneousAccessHash : std::hash<T> {
using folly_is_avalanching = IsAvalanchingHasher<std::hash<T>, T>;
};
//////// strings
namespace detail {
template <typename T, typename Enable = void>
struct ValueTypeForTransparentConversionToRange {
using type = char;
};
// We assume that folly::hasher<folly::Range<T const*>> won't be enabled
// when it would be lower quality than std::hash<U> for a U that is
// convertible to folly::Range<T const*>.
template <typename T>
struct ValueTypeForTransparentConversionToRange<
T,
void_t<decltype(
std::declval<hasher<Range<typename T::value_type const*>>>()(
std::declval<Range<typename T::value_type const*>>()))>> {
using type = std::remove_const_t<typename T::value_type>;
};
template <typename T>
using TransparentlyConvertibleToRange = std::is_convertible<
T,
Range<typename ValueTypeForTransparentConversionToRange<T>::type const*>>;
template <typename T>
struct TransparentRangeEqualTo {
using is_transparent = void;
template <typename U1, typename U2>
bool operator()(U1 const& lhs, U2 const& rhs) const {
return Range<T const*>{lhs} == Range<T const*>{rhs};
}
// This overload is not required for functionality, but
// guarantees that replacing std::equal_to<std::string> with
// HeterogeneousAccessEqualTo<std::string> is truly zero overhead
bool operator()(std::string const& lhs, std::string const& rhs) const {
return lhs == rhs;
}
};
template <typename T>
struct TransparentRangeHash {
using is_transparent = void;
using folly_is_avalanching = std::true_type;
private:
template <typename U>
static std::size_t hashImpl(Range<U const*> piece) {
return hasher<Range<U const*>>{}(piece);
}
static std::size_t hashImpl(StringPiece piece) {
#if defined(_GLIBCXX_STRING)
return std::_Hash_impl::hash(piece.begin(), piece.size());
#elif defined(_LIBCPP_STRING)
return std::__do_string_hash(piece.begin(), piece.end());
#else
return hasher<StringPiece>{}(piece);
#endif
}
public:
template <typename U>
std::size_t operator()(U const& stringish) const {
return hashImpl(Range<T const*>{stringish});
}
// Neither this overload nor the platform-conditional compilation
// is required for functionality, but implementing it this
// way guarantees that replacing std::hash<std::string> with
// HeterogeneousAccessHash<std::string> is actually zero overhead
// in the case that the underlying implementations make different
// optimality tradeoffs (short versus long string performance, for
// example). If folly::hasher<StringPiece> dominated the performance
// of std::hash<std::string> then we should consider using it all of
// the time.
std::size_t operator()(std::string const& str) const {
#if defined(_GLIBCXX_STRING) || defined(_LIBCPP_STRING)
return std::hash<std::string>{}(str);
#else
return hasher<StringPiece>{}(str);
#endif
}
};
} // namespace detail
template <typename T>
struct HeterogeneousAccessEqualTo<
T,
std::enable_if_t<detail::TransparentlyConvertibleToRange<T>::value>>
: detail::TransparentRangeEqualTo<
typename detail::ValueTypeForTransparentConversionToRange<T>::type> {
};
template <typename T>
struct HeterogeneousAccessHash<
T,
std::enable_if_t<detail::TransparentlyConvertibleToRange<T>::value>>
: detail::TransparentRangeHash<
typename detail::ValueTypeForTransparentConversionToRange<T>::type> {
};
} // namespace folly
......@@ -16,16 +16,17 @@
#pragma once
#include <functional>
#include <memory>
#include <folly/container/HeterogeneousAccess-pre.h>
namespace folly {
namespace f14 {
template <typename T>
using DefaultHasher = std::hash<T>;
using DefaultHasher = HeterogeneousAccessHash<T>;
template <typename T>
using DefaultKeyEqual = std::equal_to<T>;
using DefaultKeyEqual = HeterogeneousAccessEqualTo<T>;
template <typename T>
using DefaultAlloc = std::allocator<T>;
......
......@@ -23,6 +23,7 @@
#include <folly/Memory.h>
#include <folly/Portability.h>
#include <folly/Unit.h>
#include <folly/container/HeterogeneousAccess.h>
#include <folly/container/detail/F14Table.h>
#include <folly/hash/Hash.h>
#include <folly/lang/Align.h>
......
......@@ -44,6 +44,7 @@
#include <folly/lang/SafeAssert.h>
#include <folly/portability/Builtins.h>
#include <folly/container/HeterogeneousAccess.h>
#include <folly/container/detail/F14Defaults.h>
#include <folly/container/detail/F14IntrinsicsAvailability.h>
......
......@@ -1455,6 +1455,7 @@ TEST(F14ValueMap, heterogeneousInsert) {
std::string,
transparent<hasher<StringPiece>>,
transparent<DefaultKeyEqual<StringPiece>>>>();
runHeterogeneousInsertStringTest<F14ValueMap<std::string, std::string>>();
}
TEST(F14NodeMap, heterogeneousInsert) {
......@@ -1468,6 +1469,7 @@ TEST(F14NodeMap, heterogeneousInsert) {
std::string,
transparent<hasher<StringPiece>>,
transparent<DefaultKeyEqual<StringPiece>>>>();
runHeterogeneousInsertStringTest<F14NodeMap<std::string, std::string>>();
}
TEST(F14VectorMap, heterogeneousInsert) {
......@@ -1481,6 +1483,7 @@ TEST(F14VectorMap, heterogeneousInsert) {
std::string,
transparent<hasher<StringPiece>>,
transparent<DefaultKeyEqual<StringPiece>>>>();
runHeterogeneousInsertStringTest<F14VectorMap<std::string, std::string>>();
}
TEST(F14FastMap, heterogeneousInsert) {
......@@ -1494,6 +1497,7 @@ TEST(F14FastMap, heterogeneousInsert) {
std::string,
transparent<hasher<StringPiece>>,
transparent<DefaultKeyEqual<StringPiece>>>>();
runHeterogeneousInsertStringTest<F14FastMap<std::string, std::string>>();
}
///////////////////////////////////
......
......@@ -1099,6 +1099,7 @@ TEST(F14ValueSet, heterogeneousInsert) {
std::string,
transparent<hasher<StringPiece>>,
transparent<DefaultKeyEqual<StringPiece>>>>();
runHeterogeneousInsertStringTest<F14ValueSet<std::string>>();
}
TEST(F14NodeSet, heterogeneousInsert) {
......@@ -1110,6 +1111,7 @@ TEST(F14NodeSet, heterogeneousInsert) {
std::string,
transparent<hasher<StringPiece>>,
transparent<DefaultKeyEqual<StringPiece>>>>();
runHeterogeneousInsertStringTest<F14NodeSet<std::string>>();
}
TEST(F14VectorSet, heterogeneousInsert) {
......@@ -1121,6 +1123,7 @@ TEST(F14VectorSet, heterogeneousInsert) {
std::string,
transparent<hasher<StringPiece>>,
transparent<DefaultKeyEqual<StringPiece>>>>();
runHeterogeneousInsertStringTest<F14VectorSet<std::string>>();
}
TEST(F14FastSet, heterogeneousInsert) {
......@@ -1132,6 +1135,7 @@ TEST(F14FastSet, heterogeneousInsert) {
std::string,
transparent<hasher<StringPiece>>,
transparent<DefaultKeyEqual<StringPiece>>>>();
runHeterogeneousInsertStringTest<F14FastSet<std::string>>();
}
namespace {
......
......@@ -279,7 +279,7 @@ struct Tracked {
template <int Tag>
struct TransparentTrackedHash {
using is_transparent = std::true_type;
using is_transparent = void;
size_t operator()(Tracked<Tag> const& tracked) const {
return tracked.val_ ^ Tag;
......@@ -291,7 +291,7 @@ struct TransparentTrackedHash {
template <int Tag>
struct TransparentTrackedEqual {
using is_transparent = std::true_type;
using is_transparent = void;
uint64_t unwrap(Tracked<Tag> const& v) const {
return v.val_;
......
/*
* Copyright 2018-present Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <folly/container/HeterogeneousAccess.h>
#include <set>
#include <vector>
#include <folly/Portability.h>
#include <folly/Range.h>
#include <folly/Traits.h>
#include <folly/portability/GTest.h>
#include <folly/small_vector.h>
#if FOLLY_HAS_STRING_VIEW
#include <string_view> // @manual
#endif
using namespace folly;
namespace {
template <typename T, typename Enable = void>
struct IsTransparent : std::false_type {};
template <typename T>
struct IsTransparent<T, void_t<typename T::is_transparent>> : std::true_type {};
template <typename T>
void checkTransparent() {
static_assert(IsTransparent<HeterogeneousAccessEqualTo<T>>::value, "");
static_assert(IsTransparent<HeterogeneousAccessHash<T>>::value, "");
}
template <typename T>
void checkNotTransparent() {
static_assert(!IsTransparent<HeterogeneousAccessEqualTo<T>>::value, "");
static_assert(!IsTransparent<HeterogeneousAccessHash<T>>::value, "");
}
struct StringVector {
std::vector<std::string> data_;
/* implicit */ operator Range<std::string const*>() const {
return {&data_[0], data_.size()};
}
};
} // namespace
namespace std {
template <>
struct hash<StringVector> {
std::size_t operator()(StringVector const& value) const {
return folly::hash::hash_range(value.data_.begin(), value.data_.end());
}
};
} // namespace std
TEST(HeterogeneousAccess, transparentIsSelected) {
checkTransparent<std::string>();
checkTransparent<std::wstring>();
checkTransparent<std::u16string>();
checkTransparent<std::u32string>();
#if FOLLY_HAS_STRING_VIEW
checkTransparent<std::string_view>();
checkTransparent<std::wstring_view>();
checkTransparent<std::u16string_view>();
checkTransparent<std::u32string_view>();
#endif
checkTransparent<StringPiece>();
checkTransparent<MutableStringPiece>();
checkTransparent<Range<char const*>>();
checkTransparent<Range<wchar_t const*>>();
checkTransparent<Range<char16_t const*>>();
checkTransparent<Range<char32_t const*>>();
checkTransparent<Range<int const*>>();
checkTransparent<Range<char*>>();
checkTransparent<Range<wchar_t*>>();
checkTransparent<Range<char16_t*>>();
checkTransparent<Range<char32_t*>>();
checkTransparent<Range<int*>>();
checkTransparent<std::vector<char>>();
checkTransparent<std::vector<wchar_t>>();
checkTransparent<std::vector<char16_t>>();
checkTransparent<std::vector<char32_t>>();
checkTransparent<std::vector<int>>();
checkTransparent<std::array<char const, 2>>();
checkTransparent<std::array<wchar_t const, 2>>();
checkTransparent<std::array<char16_t const, 2>>();
checkTransparent<std::array<char32_t const, 2>>();
checkTransparent<std::array<int const, 2>>();
checkTransparent<std::array<char, 2>>();
checkTransparent<std::array<wchar_t, 2>>();
checkTransparent<std::array<char16_t, 2>>();
checkTransparent<std::array<char32_t, 2>>();
checkTransparent<std::array<int, 2>>();
}
TEST(HeterogeneousAccess, transparentIsNotSelected) {
checkNotTransparent<char>();
checkNotTransparent<int>();
checkNotTransparent<float>();
checkNotTransparent<std::pair<StringPiece, StringPiece>>();
checkNotTransparent<StringVector>(); // no folly::hasher for Range
}
template <typename L, typename R, typename S>
void runTestMatches2(S src) {
S smaller{src};
smaller.resize(smaller.size() - 1);
using RangeType = Range<typename S::value_type*>;
L lhs1{RangeType{&src[0], src.size()}};
L lhs2{RangeType{&smaller[0], smaller.size()}};
R rhs1{RangeType{&src[0], src.size()}};
R rhs2{RangeType{&smaller[0], smaller.size()}};
HeterogeneousAccessEqualTo<L> equalTo;
HeterogeneousAccessHash<L> hash;
EXPECT_TRUE(equalTo(lhs1, rhs1));
EXPECT_FALSE(equalTo(lhs1, rhs2));
EXPECT_FALSE(equalTo(lhs2, rhs1));
EXPECT_TRUE(equalTo(lhs2, rhs2));
EXPECT_EQ(hash(lhs1), hash(rhs1));
EXPECT_NE(hash(lhs1), hash(rhs2)); // technically only low probability
EXPECT_NE(hash(lhs2), hash(rhs1)); // technically only low probability
EXPECT_EQ(hash(lhs2), hash(rhs2));
auto v0 = smaller[0];
std::array<decltype(v0), 1> a{{v0}};
EXPECT_FALSE(equalTo(a, lhs1));
EXPECT_FALSE(equalTo(a, rhs1));
smaller.resize(1);
EXPECT_FALSE(equalTo(a, lhs1));
EXPECT_FALSE(equalTo(a, lhs2));
EXPECT_TRUE(equalTo(a, smaller));
EXPECT_EQ(hash(a), hash(smaller));
}
template <typename S>
void runTestMatches(S const& src) {
using SP = Range<typename S::value_type const*>;
using MSP = Range<typename S::value_type*>;
#if FOLLY_HAS_STRING_VIEW
using SV = std::basic_string_view<typename S::value_type>;
#else
using SV = SP;
#endif
using V = std::vector<typename S::value_type>;
runTestMatches2<S, S>(src);
runTestMatches2<S, SP>(src);
runTestMatches2<S, MSP>(src);
runTestMatches2<S, SV>(src);
runTestMatches2<S, V>(src);
runTestMatches2<SP, S>(src);
runTestMatches2<SP, SP>(src);
runTestMatches2<SP, MSP>(src);
runTestMatches2<SP, SV>(src);
runTestMatches2<SP, V>(src);
runTestMatches2<MSP, S>(src);
runTestMatches2<MSP, SP>(src);
runTestMatches2<MSP, MSP>(src);
runTestMatches2<MSP, SV>(src);
runTestMatches2<MSP, V>(src);
runTestMatches2<SV, S>(src);
runTestMatches2<SV, SP>(src);
runTestMatches2<SV, MSP>(src);
runTestMatches2<SV, SV>(src);
runTestMatches2<SV, V>(src);
runTestMatches2<V, S>(src);
runTestMatches2<V, SP>(src);
runTestMatches2<V, MSP>(src);
runTestMatches2<V, SV>(src);
runTestMatches2<V, V>(src);
}
Range<int const*> foo(small_vector<int, 2> const& sv) {
return sv;
}
TEST(HeterogeneousAccess, transparentMatches) {
runTestMatches<std::string>("abcd");
runTestMatches<std::string>(u8"abcd");
runTestMatches<std::wstring>(L"abcd");
runTestMatches<std::u16string>(u"abcd");
runTestMatches<std::u32string>(U"abcd");
runTestMatches<std::vector<int>>({1, 2, 3, 4});
static_assert(
std::is_convertible<small_vector<int, 2>, Range<int const*>>::value, "");
runTestMatches<small_vector<int, 2>>({1, 2, 3, 4});
}
......@@ -272,7 +272,8 @@ TEST(EmplaceIterator, HintEmplacerTest) {
v2.begin(),
v2.end(),
hint_emplacer(diff, diff.end()));
ASSERT_EQ(diff, std::set<O>({O(1), O(3)}));
std::set<O> expected = {O(1), O(3)};
ASSERT_EQ(diff, expected);
}
}
......
......@@ -427,6 +427,7 @@ class small_vector : public detail::small_vector_base<
typedef value_type* iterator;
typedef value_type* pointer;
typedef value_type const* const_iterator;
typedef value_type const* const_pointer;
typedef std::ptrdiff_t difference_type;
typedef std::reverse_iterator<iterator> reverse_iterator;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment