Commit 99de4c5f authored by Xiaofan Yang's avatar Xiaofan Yang Committed by facebook-github-bot-1

add config to allow using quadratic probing

Summary: In my use case, 1.5 billion keys with loadFactor 0.8, the linear probing performs really bad.

Reviewed By: nbronson

Differential Revision: D2579243

fb-gh-sync-id: 5081356de55f770823a4afad55bf7e2114b4e313
parent af3fa519
...@@ -25,8 +25,8 @@ namespace folly { ...@@ -25,8 +25,8 @@ namespace folly {
// AtomicHashArray private constructor -- // AtomicHashArray private constructor --
template <class KeyT, class ValueT, template <class KeyT, class ValueT,
class HashFcn, class EqualFcn, class Allocator> class HashFcn, class EqualFcn, class Allocator, class ProbeFcn>
AtomicHashArray<KeyT, ValueT, HashFcn, EqualFcn, Allocator>:: AtomicHashArray<KeyT, ValueT, HashFcn, EqualFcn, Allocator, ProbeFcn>::
AtomicHashArray(size_t capacity, KeyT emptyKey, KeyT lockedKey, AtomicHashArray(size_t capacity, KeyT emptyKey, KeyT lockedKey,
KeyT erasedKey, double _maxLoadFactor, size_t cacheSize) KeyT erasedKey, double _maxLoadFactor, size_t cacheSize)
: capacity_(capacity), : capacity_(capacity),
...@@ -44,17 +44,17 @@ AtomicHashArray(size_t capacity, KeyT emptyKey, KeyT lockedKey, ...@@ -44,17 +44,17 @@ AtomicHashArray(size_t capacity, KeyT emptyKey, KeyT lockedKey,
* ret.index is set to capacity_. * ret.index is set to capacity_.
*/ */
template <class KeyT, class ValueT, template <class KeyT, class ValueT,
class HashFcn, class EqualFcn, class Allocator> class HashFcn, class EqualFcn, class Allocator, class ProbeFcn>
typename AtomicHashArray<KeyT, ValueT, typename AtomicHashArray<KeyT, ValueT,
HashFcn, EqualFcn, Allocator>::SimpleRetT HashFcn, EqualFcn, Allocator, ProbeFcn>::SimpleRetT
AtomicHashArray<KeyT, ValueT, HashFcn, EqualFcn, Allocator>:: AtomicHashArray<KeyT, ValueT, HashFcn, EqualFcn, Allocator, ProbeFcn>::
findInternal(const KeyT key_in) { findInternal(const KeyT key_in) {
DCHECK_NE(key_in, kEmptyKey_); DCHECK_NE(key_in, kEmptyKey_);
DCHECK_NE(key_in, kLockedKey_); DCHECK_NE(key_in, kLockedKey_);
DCHECK_NE(key_in, kErasedKey_); DCHECK_NE(key_in, kErasedKey_);
for (size_t idx = keyToAnchorIdx(key_in), numProbes = 0; for (size_t idx = keyToAnchorIdx(key_in), numProbes = 0;
; ;
idx = probeNext(idx, numProbes)) { idx = ProbeFcn()(idx, numProbes, capacity_)) {
const KeyT key = acquireLoadKey(cells_[idx]); const KeyT key = acquireLoadKey(cells_[idx]);
if (LIKELY(EqualFcn()(key, key_in))) { if (LIKELY(EqualFcn()(key, key_in))) {
return SimpleRetT(idx, true); return SimpleRetT(idx, true);
...@@ -63,6 +63,8 @@ findInternal(const KeyT key_in) { ...@@ -63,6 +63,8 @@ findInternal(const KeyT key_in) {
// if we hit an empty element, this key does not exist // if we hit an empty element, this key does not exist
return SimpleRetT(capacity_, false); return SimpleRetT(capacity_, false);
} }
// NOTE: the way we count numProbes must be same in find(), insert(),
// and erase(). Otherwise it may break probing.
++numProbes; ++numProbes;
if (UNLIKELY(numProbes >= capacity_)) { if (UNLIKELY(numProbes >= capacity_)) {
// probed every cell...fail // probed every cell...fail
...@@ -82,11 +84,11 @@ findInternal(const KeyT key_in) { ...@@ -82,11 +84,11 @@ findInternal(const KeyT key_in) {
* default. * default.
*/ */
template <class KeyT, class ValueT, template <class KeyT, class ValueT,
class HashFcn, class EqualFcn, class Allocator> class HashFcn, class EqualFcn, class Allocator, class ProbeFcn>
template <typename... ArgTs> template <typename... ArgTs>
typename AtomicHashArray<KeyT, ValueT, typename AtomicHashArray<KeyT, ValueT,
HashFcn, EqualFcn, Allocator>::SimpleRetT HashFcn, EqualFcn, Allocator, ProbeFcn>::SimpleRetT
AtomicHashArray<KeyT, ValueT, HashFcn, EqualFcn, Allocator>:: AtomicHashArray<KeyT, ValueT, HashFcn, EqualFcn, Allocator, ProbeFcn>::
insertInternal(KeyT key_in, ArgTs&&... vCtorArgs) { insertInternal(KeyT key_in, ArgTs&&... vCtorArgs) {
const short NO_NEW_INSERTS = 1; const short NO_NEW_INSERTS = 1;
const short NO_PENDING_INSERTS = 2; const short NO_PENDING_INSERTS = 2;
...@@ -174,13 +176,16 @@ insertInternal(KeyT key_in, ArgTs&&... vCtorArgs) { ...@@ -174,13 +176,16 @@ insertInternal(KeyT key_in, ArgTs&&... vCtorArgs) {
continue; continue;
} }
// NOTE: the way we count numProbes must be same in find(),
// insert(), and erase(). Otherwise it may break probing.
++numProbes; ++numProbes;
if (UNLIKELY(numProbes >= capacity_)) { if (UNLIKELY(numProbes >= capacity_)) {
// probed every cell...fail // probed every cell...fail
return SimpleRetT(capacity_, false); return SimpleRetT(capacity_, false);
} }
idx = probeNext(idx, numProbes); idx = ProbeFcn()(idx, numProbes, capacity_);
} }
} }
...@@ -196,15 +201,16 @@ insertInternal(KeyT key_in, ArgTs&&... vCtorArgs) { ...@@ -196,15 +201,16 @@ insertInternal(KeyT key_in, ArgTs&&... vCtorArgs) {
* touch it either. * touch it either.
*/ */
template <class KeyT, class ValueT, template <class KeyT, class ValueT,
class HashFcn, class EqualFcn, class Allocator> class HashFcn, class EqualFcn, class Allocator, class ProbeFcn>
size_t AtomicHashArray<KeyT, ValueT, HashFcn, EqualFcn, Allocator>:: size_t AtomicHashArray<KeyT, ValueT, HashFcn, EqualFcn, Allocator, ProbeFcn>::
erase(KeyT key_in) { erase(KeyT key_in) {
CHECK_NE(key_in, kEmptyKey_); CHECK_NE(key_in, kEmptyKey_);
CHECK_NE(key_in, kLockedKey_); CHECK_NE(key_in, kLockedKey_);
CHECK_NE(key_in, kErasedKey_); CHECK_NE(key_in, kErasedKey_);
for (size_t idx = keyToAnchorIdx(key_in), numProbes = 0; for (size_t idx = keyToAnchorIdx(key_in), numProbes = 0;
; ;
idx = probeNext(idx, numProbes)) { idx = ProbeFcn()(idx, numProbes, capacity_)) {
DCHECK_LT(idx, capacity_); DCHECK_LT(idx, capacity_);
value_type* cell = &cells_[idx]; value_type* cell = &cells_[idx];
KeyT currentKey = acquireLoadKey(*cell); KeyT currentKey = acquireLoadKey(*cell);
...@@ -231,6 +237,9 @@ erase(KeyT key_in) { ...@@ -231,6 +237,9 @@ erase(KeyT key_in) {
// If another thread succeeds in erasing our key, we'll stop our search. // If another thread succeeds in erasing our key, we'll stop our search.
return 0; return 0;
} }
// NOTE: the way we count numProbes must be same in find(), insert(),
// and erase(). Otherwise it may break probing.
++numProbes; ++numProbes;
if (UNLIKELY(numProbes >= capacity_)) { if (UNLIKELY(numProbes >= capacity_)) {
// probed every cell...fail // probed every cell...fail
...@@ -240,10 +249,10 @@ erase(KeyT key_in) { ...@@ -240,10 +249,10 @@ erase(KeyT key_in) {
} }
template <class KeyT, class ValueT, template <class KeyT, class ValueT,
class HashFcn, class EqualFcn, class Allocator> class HashFcn, class EqualFcn, class Allocator, class ProbeFcn>
typename AtomicHashArray<KeyT, ValueT, typename AtomicHashArray<KeyT, ValueT,
HashFcn, EqualFcn, Allocator>::SmartPtr HashFcn, EqualFcn, Allocator, ProbeFcn>::SmartPtr
AtomicHashArray<KeyT, ValueT, HashFcn, EqualFcn, Allocator>:: AtomicHashArray<KeyT, ValueT, HashFcn, EqualFcn, Allocator, ProbeFcn>::
create(size_t maxSize, const Config& c) { create(size_t maxSize, const Config& c) {
CHECK_LE(c.maxLoadFactor, 1.0); CHECK_LE(c.maxLoadFactor, 1.0);
CHECK_GT(c.maxLoadFactor, 0.0); CHECK_GT(c.maxLoadFactor, 0.0);
...@@ -282,8 +291,8 @@ create(size_t maxSize, const Config& c) { ...@@ -282,8 +291,8 @@ create(size_t maxSize, const Config& c) {
} }
template <class KeyT, class ValueT, template <class KeyT, class ValueT,
class HashFcn, class EqualFcn, class Allocator> class HashFcn, class EqualFcn, class Allocator, class ProbeFcn>
void AtomicHashArray<KeyT, ValueT, HashFcn, EqualFcn, Allocator>:: void AtomicHashArray<KeyT, ValueT, HashFcn, EqualFcn, Allocator, ProbeFcn>::
destroy(AtomicHashArray* p) { destroy(AtomicHashArray* p) {
assert(p); assert(p);
...@@ -301,8 +310,8 @@ destroy(AtomicHashArray* p) { ...@@ -301,8 +310,8 @@ destroy(AtomicHashArray* p) {
// clear -- clears all keys and values in the map and resets all counters // clear -- clears all keys and values in the map and resets all counters
template <class KeyT, class ValueT, template <class KeyT, class ValueT,
class HashFcn, class EqualFcn, class Allocator> class HashFcn, class EqualFcn, class Allocator, class ProbeFcn>
void AtomicHashArray<KeyT, ValueT, HashFcn, EqualFcn, Allocator>:: void AtomicHashArray<KeyT, ValueT, HashFcn, EqualFcn, Allocator, ProbeFcn>::
clear() { clear() {
FOR_EACH_RANGE(i, 0, capacity_) { FOR_EACH_RANGE(i, 0, capacity_) {
if (cells_[i].first != kEmptyKey_) { if (cells_[i].first != kEmptyKey_) {
...@@ -321,9 +330,10 @@ clear() { ...@@ -321,9 +330,10 @@ clear() {
// Iterator implementation // Iterator implementation
template <class KeyT, class ValueT, template <class KeyT, class ValueT,
class HashFcn, class EqualFcn, class Allocator> class HashFcn, class EqualFcn, class Allocator, class ProbeFcn>
template <class ContT, class IterVal> template <class ContT, class IterVal>
struct AtomicHashArray<KeyT, ValueT, HashFcn, EqualFcn, Allocator>::aha_iterator struct AtomicHashArray<KeyT, ValueT, HashFcn, EqualFcn, Allocator, ProbeFcn>::
aha_iterator
: boost::iterator_facade<aha_iterator<ContT,IterVal>, : boost::iterator_facade<aha_iterator<ContT,IterVal>,
IterVal, IterVal,
boost::forward_traversal_tag> boost::forward_traversal_tag>
......
...@@ -42,16 +42,38 @@ ...@@ -42,16 +42,38 @@
namespace folly { namespace folly {
struct AtomicHashArrayLinearProbeFcn
{
inline size_t operator()(size_t idx, size_t numProbes, size_t capacity) const{
idx += 1; // linear probing
// Avoid modulus because it's slow
return LIKELY(idx < capacity) ? idx : (idx - capacity);
}
};
struct AtomicHashArrayQuadraticProbeFcn
{
inline size_t operator()(size_t idx, size_t numProbes, size_t capacity) const{
idx += numProbes; // quadratic probing
// Avoid modulus because it's slow
return LIKELY(idx < capacity) ? idx : (idx - capacity);
}
};
template <class KeyT, class ValueT, template <class KeyT, class ValueT,
class HashFcn = std::hash<KeyT>, class HashFcn = std::hash<KeyT>,
class EqualFcn = std::equal_to<KeyT>, class EqualFcn = std::equal_to<KeyT>,
class Allocator = std::allocator<char>> class Allocator = std::allocator<char>,
class ProbeFcn = AtomicHashArrayLinearProbeFcn>
class AtomicHashMap; class AtomicHashMap;
template <class KeyT, class ValueT, template <class KeyT, class ValueT,
class HashFcn = std::hash<KeyT>, class HashFcn = std::hash<KeyT>,
class EqualFcn = std::equal_to<KeyT>, class EqualFcn = std::equal_to<KeyT>,
class Allocator = std::allocator<char>> class Allocator = std::allocator<char>,
class ProbeFcn = AtomicHashArrayLinearProbeFcn>
class AtomicHashArray : boost::noncopyable { class AtomicHashArray : boost::noncopyable {
static_assert((std::is_convertible<KeyT,int32_t>::value || static_assert((std::is_convertible<KeyT,int32_t>::value ||
std::is_convertible<KeyT,int64_t>::value || std::is_convertible<KeyT,int64_t>::value ||
...@@ -240,13 +262,20 @@ class AtomicHashArray : boost::noncopyable { ...@@ -240,13 +262,20 @@ class AtomicHashArray : boost::noncopyable {
/* Private data and helper functions... */ /* Private data and helper functions... */
private: private:
friend class AtomicHashMap<KeyT, ValueT, HashFcn, EqualFcn, Allocator>; friend class AtomicHashMap<KeyT,
ValueT,
HashFcn,
EqualFcn,
Allocator,
ProbeFcn>;
struct SimpleRetT { size_t idx; bool success; struct SimpleRetT { size_t idx; bool success;
SimpleRetT(size_t i, bool s) : idx(i), success(s) {} SimpleRetT(size_t i, bool s) : idx(i), success(s) {}
SimpleRetT() = default; SimpleRetT() = default;
}; };
template <typename... ArgTs> template <typename... ArgTs>
SimpleRetT insertInternal(KeyT key, ArgTs&&... vCtorArgs); SimpleRetT insertInternal(KeyT key, ArgTs&&... vCtorArgs);
...@@ -307,12 +336,7 @@ class AtomicHashArray : boost::noncopyable { ...@@ -307,12 +336,7 @@ class AtomicHashArray : boost::noncopyable {
return LIKELY(probe < capacity_) ? probe : hashVal % capacity_; return LIKELY(probe < capacity_) ? probe : hashVal % capacity_;
} }
inline size_t probeNext(size_t idx, size_t /*numProbes*/) {
//idx += numProbes; // quadratic probing
idx += 1; // linear probing
// Avoid modulus because it's slow
return LIKELY(idx < capacity_) ? idx : (idx - capacity_);
}
}; // AtomicHashArray }; // AtomicHashArray
} // namespace folly } // namespace folly
......
This diff is collapsed.
...@@ -156,9 +156,10 @@ struct AtomicHashMapFullError : std::runtime_error { ...@@ -156,9 +156,10 @@ struct AtomicHashMapFullError : std::runtime_error {
}; };
template<class KeyT, class ValueT, template<class KeyT, class ValueT,
class HashFcn, class EqualFcn, class Allocator> class HashFcn, class EqualFcn, class Allocator, class ProbeFcn>
class AtomicHashMap : boost::noncopyable { class AtomicHashMap : boost::noncopyable {
typedef AtomicHashArray<KeyT, ValueT, HashFcn, EqualFcn, Allocator> SubMap; typedef AtomicHashArray<KeyT, ValueT, HashFcn, EqualFcn, Allocator, ProbeFcn>
SubMap;
public: public:
typedef KeyT key_type; typedef KeyT key_type;
...@@ -422,6 +423,18 @@ class AtomicHashMap : boost::noncopyable { ...@@ -422,6 +423,18 @@ class AtomicHashMap : boost::noncopyable {
}; // AtomicHashMap }; // AtomicHashMap
template <class KeyT,
class ValueT,
class HashFcn = std::hash<KeyT>,
class EqualFcn = std::equal_to<KeyT>,
class Allocator = std::allocator<char>>
using QuadraticProbingAtomicHashMap =
AtomicHashMap<KeyT,
ValueT,
HashFcn,
EqualFcn,
Allocator,
AtomicHashArrayQuadraticProbeFcn>;
} // namespace folly } // namespace folly
#include <folly/AtomicHashMap-inl.h> #include <folly/AtomicHashMap-inl.h>
......
...@@ -96,10 +96,13 @@ pair<KeyT,ValueT> createEntry(int i) { ...@@ -96,10 +96,13 @@ pair<KeyT,ValueT> createEntry(int i) {
to<ValueT>(i + 3)); to<ValueT>(i + 3));
} }
template<class KeyT, class ValueT, class Allocator = std::allocator<char>> template <class KeyT,
class ValueT,
class Allocator = std::allocator<char>,
class ProbeFcn = AtomicHashArrayLinearProbeFcn>
void testMap() { void testMap() {
typedef AtomicHashArray<KeyT, ValueT, std::hash<KeyT>, typedef AtomicHashArray<KeyT, ValueT, std::hash<KeyT>,
std::equal_to<KeyT>, Allocator> MyArr; std::equal_to<KeyT>, Allocator, ProbeFcn> MyArr;
auto arr = MyArr::create(150); auto arr = MyArr::create(150);
map<KeyT, ValueT> ref; map<KeyT, ValueT> ref;
for (int i = 0; i < 100; ++i) { for (int i = 0; i < 100; ++i) {
...@@ -144,10 +147,13 @@ void testMap() { ...@@ -144,10 +147,13 @@ void testMap() {
} }
} }
template<class KeyT, class ValueT, class Allocator = std::allocator<char>> template<class KeyT, class ValueT,
class Allocator = std::allocator<char>,
class ProbeFcn = AtomicHashArrayLinearProbeFcn>
void testNoncopyableMap() { void testNoncopyableMap() {
typedef AtomicHashArray<KeyT, std::unique_ptr<ValueT>, std::hash<KeyT>, typedef AtomicHashArray<KeyT, std::unique_ptr<ValueT>, std::hash<KeyT>,
std::equal_to<KeyT>, Allocator> MyArr; std::equal_to<KeyT>, Allocator, ProbeFcn> MyArr;
auto arr = MyArr::create(250); auto arr = MyArr::create(250);
for (int i = 0; i < 100; i++) { for (int i = 0; i < 100; i++) {
arr->insert(make_pair(i,std::unique_ptr<ValueT>(new ValueT(i)))); arr->insert(make_pair(i,std::unique_ptr<ValueT>(new ValueT(i))));
...@@ -168,34 +174,74 @@ void testNoncopyableMap() { ...@@ -168,34 +174,74 @@ void testNoncopyableMap() {
TEST(Aha, InsertErase_i32_i32) { TEST(Aha, InsertErase_i32_i32) {
testMap<int32_t, int32_t>(); testMap<int32_t, int32_t>();
testMap<int32_t, int32_t, MmapAllocator<char>>(); testMap<int32_t, int32_t, MmapAllocator<char>>();
testMap<int32_t, int32_t,
std::allocator<char>, AtomicHashArrayQuadraticProbeFcn>();
testMap<int32_t, int32_t,
MmapAllocator<char>, AtomicHashArrayQuadraticProbeFcn>();
testNoncopyableMap<int32_t, int32_t>(); testNoncopyableMap<int32_t, int32_t>();
testNoncopyableMap<int32_t, int32_t, MmapAllocator<char>>(); testNoncopyableMap<int32_t, int32_t, MmapAllocator<char>>();
testNoncopyableMap<int32_t, int32_t,
std::allocator<char>, AtomicHashArrayQuadraticProbeFcn>();
testNoncopyableMap<int32_t, int32_t,
MmapAllocator<char>, AtomicHashArrayQuadraticProbeFcn>();
} }
TEST(Aha, InsertErase_i64_i32) { TEST(Aha, InsertErase_i64_i32) {
testMap<int64_t, int32_t>(); testMap<int64_t, int32_t>();
testMap<int64_t, int32_t, MmapAllocator<char>>(); testMap<int64_t, int32_t, MmapAllocator<char>>();
testMap<int64_t, int32_t,
std::allocator<char>, AtomicHashArrayQuadraticProbeFcn>();
testMap<int64_t, int32_t,
MmapAllocator<char>, AtomicHashArrayQuadraticProbeFcn>();
testNoncopyableMap<int64_t, int32_t>(); testNoncopyableMap<int64_t, int32_t>();
testNoncopyableMap<int64_t, int32_t, MmapAllocator<char>>(); testNoncopyableMap<int64_t, int32_t, MmapAllocator<char>>();
testNoncopyableMap<int64_t, int32_t,
std::allocator<char>, AtomicHashArrayQuadraticProbeFcn>();
testNoncopyableMap<int64_t, int32_t,
MmapAllocator<char>, AtomicHashArrayQuadraticProbeFcn>();
} }
TEST(Aha, InsertErase_i64_i64) { TEST(Aha, InsertErase_i64_i64) {
testMap<int64_t, int64_t>(); testMap<int64_t, int64_t>();
testMap<int64_t, int64_t, MmapAllocator<char>>(); testMap<int64_t, int64_t, MmapAllocator<char>>();
testMap<int64_t, int64_t,
std::allocator<char>, AtomicHashArrayQuadraticProbeFcn>();
testMap<int64_t, int64_t,
MmapAllocator<char>, AtomicHashArrayQuadraticProbeFcn>();
testNoncopyableMap<int64_t, int64_t>(); testNoncopyableMap<int64_t, int64_t>();
testNoncopyableMap<int64_t, int64_t, MmapAllocator<char>>(); testNoncopyableMap<int64_t, int64_t, MmapAllocator<char>>();
testNoncopyableMap<int64_t, int64_t,
std::allocator<char>, AtomicHashArrayQuadraticProbeFcn>();
testNoncopyableMap<int64_t, int64_t,
MmapAllocator<char>, AtomicHashArrayQuadraticProbeFcn>();
} }
TEST(Aha, InsertErase_i32_i64) { TEST(Aha, InsertErase_i32_i64) {
testMap<int32_t, int64_t>(); testMap<int32_t, int64_t>();
testMap<int32_t, int64_t, MmapAllocator<char>>(); testMap<int32_t, int64_t, MmapAllocator<char>>();
testMap<int32_t, int64_t,
std::allocator<char>, AtomicHashArrayQuadraticProbeFcn>();
testMap<int32_t, int64_t,
MmapAllocator<char>, AtomicHashArrayQuadraticProbeFcn>();
testNoncopyableMap<int32_t, int64_t>(); testNoncopyableMap<int32_t, int64_t>();
testNoncopyableMap<int32_t, int64_t, MmapAllocator<char>>(); testNoncopyableMap<int32_t, int64_t, MmapAllocator<char>>();
testNoncopyableMap<int32_t, int64_t,
std::allocator<char>, AtomicHashArrayQuadraticProbeFcn>();
testNoncopyableMap<int32_t, int64_t,
MmapAllocator<char>, AtomicHashArrayQuadraticProbeFcn>();
} }
TEST(Aha, InsertErase_i32_str) { TEST(Aha, InsertErase_i32_str) {
testMap<int32_t, string>(); testMap<int32_t, string>();
testMap<int32_t, string, MmapAllocator<char>>(); testMap<int32_t, string, MmapAllocator<char>>();
testMap<int32_t, string,
std::allocator<char>, AtomicHashArrayQuadraticProbeFcn>();
testMap<int32_t, string,
MmapAllocator<char>, AtomicHashArrayQuadraticProbeFcn>();
} }
TEST(Aha, InsertErase_i64_str) { TEST(Aha, InsertErase_i64_str) {
testMap<int64_t, string>(); testMap<int64_t, string>();
testMap<int64_t, string, MmapAllocator<char>>(); testMap<int64_t, string, MmapAllocator<char>>();
testMap<int64_t, string,
std::allocator<char>, AtomicHashArrayQuadraticProbeFcn>();
testMap<int64_t, string,
MmapAllocator<char>, AtomicHashArrayQuadraticProbeFcn>();
} }
TEST(Aha, Create_cstr_i64) { TEST(Aha, Create_cstr_i64) {
......
...@@ -101,10 +101,12 @@ typedef int32_t ValueT; ...@@ -101,10 +101,12 @@ typedef int32_t ValueT;
typedef AtomicHashMap<KeyT,ValueT> AHMapT; typedef AtomicHashMap<KeyT,ValueT> AHMapT;
typedef AHMapT::value_type RecordT; typedef AHMapT::value_type RecordT;
typedef AtomicHashArray<KeyT,ValueT> AHArrayT; typedef AtomicHashArray<KeyT,ValueT> AHArrayT;
AHArrayT::Config config; AHArrayT::Config config;
typedef folly::QuadraticProbingAtomicHashMap<KeyT,ValueT> QPAHMapT;
QPAHMapT::Config qpConfig;
static AHArrayT::SmartPtr globalAHA(nullptr); static AHArrayT::SmartPtr globalAHA(nullptr);
static std::unique_ptr<AHMapT> globalAHM; static std::unique_ptr<AHMapT> globalAHM;
static std::unique_ptr<QPAHMapT> globalQPAHM;
// Generate a deterministic value based on an input key // Generate a deterministic value based on an input key
static int genVal(int key) { static int genVal(int key) {
...@@ -353,6 +355,15 @@ void* insertThread(void* jj) { ...@@ -353,6 +355,15 @@ void* insertThread(void* jj) {
return nullptr; return nullptr;
} }
void* qpInsertThread(void* jj) {
int64_t j = (int64_t) jj;
for (int i = 0; i < numOpsPerThread; ++i) {
KeyT key = randomizeKey(i + j * numOpsPerThread);
globalQPAHM->insert(key, genVal(key));
}
return nullptr;
}
void* insertThreadArr(void* jj) { void* insertThreadArr(void* jj) {
int64_t j = (int64_t) jj; int64_t j = (int64_t) jj;
for (int i = 0; i < numOpsPerThread; ++i) { for (int i = 0; i < numOpsPerThread; ++i) {
...@@ -715,6 +726,19 @@ void loadGlobalAhm() { ...@@ -715,6 +726,19 @@ void loadGlobalAhm() {
EXPECT_EQ(globalAHM->size(), FLAGS_numBMElements); EXPECT_EQ(globalAHM->size(), FLAGS_numBMElements);
} }
void loadGlobalQPAhm() {
std::cout << "loading global QPAHM with " << FLAGS_numThreads
<< " threads...\n";
uint64_t start = nowInUsec();
globalQPAHM.reset(new QPAHMapT(maxBMElements, qpConfig));
numOpsPerThread = FLAGS_numBMElements / FLAGS_numThreads;
runThreads(qpInsertThread);
uint64_t elapsed = nowInUsec() - start;
std::cout << " took " << elapsed / 1000 << " ms (" <<
(elapsed * 1000 / FLAGS_numBMElements) << " ns/insert).\n";
EXPECT_EQ(globalQPAHM->size(), FLAGS_numBMElements);
}
} }
BENCHMARK(st_aha_find, iters) { BENCHMARK(st_aha_find, iters) {
...@@ -733,6 +757,14 @@ BENCHMARK(st_ahm_find, iters) { ...@@ -733,6 +757,14 @@ BENCHMARK(st_ahm_find, iters) {
} }
} }
BENCHMARK(st_qpahm_find, iters) {
CHECK_LE(iters, FLAGS_numBMElements);
for (size_t i = 0; i < iters; i++) {
KeyT key = randomizeKey(i);
folly::doNotOptimizeAway(globalQPAHM->find(key)->second);
}
}
BENCHMARK_DRAW_LINE() BENCHMARK_DRAW_LINE()
BENCHMARK(mt_ahm_miss, iters) { BENCHMARK(mt_ahm_miss, iters) {
...@@ -749,6 +781,20 @@ BENCHMARK(mt_ahm_miss, iters) { ...@@ -749,6 +781,20 @@ BENCHMARK(mt_ahm_miss, iters) {
}); });
} }
BENCHMARK(mt_qpahm_miss, iters) {
CHECK_LE(iters, FLAGS_numBMElements);
numOpsPerThread = iters / FLAGS_numThreads;
runThreads([](void* jj) -> void* {
int64_t j = (int64_t) jj;
while (!runThreadsCreatedAllThreads.load());
for (int i = 0; i < numOpsPerThread; ++i) {
KeyT key = i + j * numOpsPerThread * 100;
folly::doNotOptimizeAway(globalQPAHM->find(key) == globalQPAHM->end());
}
return nullptr;
});
}
BENCHMARK(st_ahm_miss, iters) { BENCHMARK(st_ahm_miss, iters) {
CHECK_LE(iters, FLAGS_numBMElements); CHECK_LE(iters, FLAGS_numBMElements);
for (size_t i = 0; i < iters; i++) { for (size_t i = 0; i < iters; i++) {
...@@ -757,6 +803,14 @@ BENCHMARK(st_ahm_miss, iters) { ...@@ -757,6 +803,14 @@ BENCHMARK(st_ahm_miss, iters) {
} }
} }
BENCHMARK(st_qpahm_miss, iters) {
CHECK_LE(iters, FLAGS_numBMElements);
for (size_t i = 0; i < iters; i++) {
KeyT key = randomizeKey(i + iters * 100);
folly::doNotOptimizeAway(globalQPAHM->find(key) == globalQPAHM->end());
}
}
BENCHMARK(mt_ahm_find_insert_mix, iters) { BENCHMARK(mt_ahm_find_insert_mix, iters) {
CHECK_LE(iters, FLAGS_numBMElements); CHECK_LE(iters, FLAGS_numBMElements);
numOpsPerThread = iters / FLAGS_numThreads; numOpsPerThread = iters / FLAGS_numThreads;
...@@ -776,6 +830,26 @@ BENCHMARK(mt_ahm_find_insert_mix, iters) { ...@@ -776,6 +830,26 @@ BENCHMARK(mt_ahm_find_insert_mix, iters) {
}); });
} }
BENCHMARK(mt_qpahm_find_insert_mix, iters) {
CHECK_LE(iters, FLAGS_numBMElements);
numOpsPerThread = iters / FLAGS_numThreads;
runThreads([](void* jj) -> void* {
int64_t j = (int64_t) jj;
while (!runThreadsCreatedAllThreads.load());
for (int i = 0; i < numOpsPerThread; ++i) {
if (i % 128) { // ~1% insert mix
KeyT key = randomizeKey(i + j * numOpsPerThread);
folly::doNotOptimizeAway(globalQPAHM->find(key)->second);
} else {
KeyT key = randomizeKey(i + j * numOpsPerThread * 100);
globalQPAHM->insert(key, genVal(key));
}
}
return nullptr;
});
}
BENCHMARK(mt_aha_find, iters) { BENCHMARK(mt_aha_find, iters) {
CHECK_LE(iters, FLAGS_numBMElements); CHECK_LE(iters, FLAGS_numBMElements);
numOpsPerThread = iters / FLAGS_numThreads; numOpsPerThread = iters / FLAGS_numThreads;
...@@ -804,6 +878,20 @@ BENCHMARK(mt_ahm_find, iters) { ...@@ -804,6 +878,20 @@ BENCHMARK(mt_ahm_find, iters) {
}); });
} }
BENCHMARK(mt_qpahm_find, iters) {
CHECK_LE(iters, FLAGS_numBMElements);
numOpsPerThread = iters / FLAGS_numThreads;
runThreads([](void* jj) -> void* {
int64_t j = (int64_t) jj;
while (!runThreadsCreatedAllThreads.load());
for (int i = 0; i < numOpsPerThread; ++i) {
KeyT key = randomizeKey(i + j * numOpsPerThread);
folly::doNotOptimizeAway(globalQPAHM->find(key)->second);
}
return nullptr;
});
}
KeyT k; KeyT k;
BENCHMARK(st_baseline_modulus_and_random, iters) { BENCHMARK(st_baseline_modulus_and_random, iters) {
for (size_t i = 0; i < iters; ++i) { for (size_t i = 0; i < iters; ++i) {
...@@ -821,6 +909,14 @@ BENCHMARK(mt_ahm_insert, iters) { ...@@ -821,6 +909,14 @@ BENCHMARK(mt_ahm_insert, iters) {
runThreads(insertThread); runThreads(insertThread);
} }
BENCHMARK(mt_qpahm_insert, iters) {
BENCHMARK_SUSPEND {
globalQPAHM.reset(new QPAHMapT(int(iters * LF), qpConfig));
numOpsPerThread = iters / FLAGS_numThreads;
}
runThreads(qpInsertThread);
}
BENCHMARK(st_ahm_insert, iters) { BENCHMARK(st_ahm_insert, iters) {
folly::BenchmarkSuspender susp; folly::BenchmarkSuspender susp;
std::unique_ptr<AHMapT> ahm(new AHMapT(int(iters * LF), config)); std::unique_ptr<AHMapT> ahm(new AHMapT(int(iters * LF), config));
...@@ -832,12 +928,25 @@ BENCHMARK(st_ahm_insert, iters) { ...@@ -832,12 +928,25 @@ BENCHMARK(st_ahm_insert, iters) {
} }
} }
BENCHMARK(st_qpahm_insert, iters) {
folly::BenchmarkSuspender susp;
std::unique_ptr<QPAHMapT> ahm(new QPAHMapT(int(iters * LF), qpConfig));
susp.dismiss();
for (size_t i = 0; i < iters; i++) {
KeyT key = randomizeKey(i);
ahm->insert(key, genVal(key));
}
}
void benchmarkSetup() { void benchmarkSetup() {
config.maxLoadFactor = FLAGS_maxLoadFactor; config.maxLoadFactor = FLAGS_maxLoadFactor;
qpConfig.maxLoadFactor = FLAGS_maxLoadFactor;
configRace.maxLoadFactor = 0.5; configRace.maxLoadFactor = 0.5;
int numCores = sysconf(_SC_NPROCESSORS_ONLN); int numCores = sysconf(_SC_NPROCESSORS_ONLN);
loadGlobalAha(); loadGlobalAha();
loadGlobalAhm(); loadGlobalAhm();
loadGlobalQPAhm();
string numIters = folly::to<string>( string numIters = folly::to<string>(
std::min(1000000, int(FLAGS_numBMElements))); std::min(1000000, int(FLAGS_numBMElements)));
...@@ -871,28 +980,38 @@ int main(int argc, char** argv) { ...@@ -871,28 +980,38 @@ int main(int argc, char** argv) {
} }
/* /*
Benchmarks run on dual Xeon X5650's @ 2.67GHz w/hyperthreading enabled loading global AHA with 8 threads...
(12 physical cores, 12 MB cache, 72 GB RAM) took 487 ms (40 ns/insert).
loading global AHM with 8 threads...
took 478 ms (39 ns/insert).
loading global QPAHM with 8 threads...
took 478 ms (39 ns/insert).
Running AHM benchmarks on machine with 24 logical cores. Running AHM benchmarks on machine with 24 logical cores.
num elements per map: 12000000 num elements per map: 12000000
num threads for mt tests: 24 num threads for mt tests: 24
AHM load factor: 0.75 AHM load factor: 0.75
Benchmark Iters Total t t/iter iter/sec ============================================================================
------------------------------------------------------------------------------ folly/test/AtomicHashMapTest.cpp relative time/iter iters/s
Comparing benchmarks: BM_mt_aha_find,BM_mt_ahm_find ============================================================================
* BM_mt_aha_find 1000000 7.767 ms 7.767 ns 122.8 M st_aha_find 92.63ns 10.80M
+0.81% BM_mt_ahm_find 1000000 7.83 ms 7.83 ns 121.8 M st_ahm_find 107.78ns 9.28M
------------------------------------------------------------------------------ st_qpahm_find 90.69ns 11.03M
Comparing benchmarks: BM_st_aha_find,BM_st_ahm_find ----------------------------------------------------------------------------
* BM_st_aha_find 1000000 57.83 ms 57.83 ns 16.49 M mt_ahm_miss 2.09ns 477.36M
+77.9% BM_st_ahm_find 1000000 102.9 ms 102.9 ns 9.27 M mt_qpahm_miss 1.37ns 728.82M
------------------------------------------------------------------------------ st_ahm_miss 241.07ns 4.15M
BM_mt_ahm_miss 1000000 2.937 ms 2.937 ns 324.7 M st_qpahm_miss 223.17ns 4.48M
BM_st_ahm_miss 1000000 164.2 ms 164.2 ns 5.807 M mt_ahm_find_insert_mix 8.05ns 124.24M
BM_mt_ahm_find_insert_mix 1000000 8.797 ms 8.797 ns 108.4 M mt_qpahm_find_insert_mix 9.10ns 109.85M
BM_mt_ahm_insert 1000000 17.39 ms 17.39 ns 54.83 M mt_aha_find 6.82ns 146.68M
BM_st_ahm_insert 1000000 106.8 ms 106.8 ns 8.93 M mt_ahm_find 7.95ns 125.77M
BM_st_baseline_modulus_and_rando 1000000 6.223 ms 6.223 ns 153.2 M mt_qpahm_find 6.81ns 146.83M
st_baseline_modulus_and_random 6.02ns 166.03M
mt_ahm_insert 14.29ns 69.97M
mt_qpahm_insert 11.68ns 85.61M
st_ahm_insert 125.39ns 7.98M
st_qpahm_insert 128.76ns 7.77M
============================================================================
*/ */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment