Commit d0889c8c authored by Nathan Bronson's avatar Nathan Bronson Committed by facebook-github-bot-4

templatize AtomicUnorderedInsertMap's internals to allow big maps

Summary: AtomicUnorderedInsertMap used 32 bit index values internally.  2 bits
were stolen, limiting the capacity to 2^30.  This diff makes the internal
index type a template parameter, so you can make really big maps if you
want (at the expense of bigger map overhead).  The easiest way is to
substitute AtomicUnorderedInsertMap64.

Reviewed By: yfeldblum

Differential Revision: D2574338

fb-gh-sync-id: a74994b6da1046a149c2e7763c3dc19e35d9840b
parent 69f3c942
...@@ -55,10 +55,12 @@ namespace folly { ...@@ -55,10 +55,12 @@ namespace folly {
/// O(1/(1-actual_load_factor)). Note that this is a pretty strong /// O(1/(1-actual_load_factor)). Note that this is a pretty strong
/// limitation, because you can't remove existing keys. /// limitation, because you can't remove existing keys.
/// ///
/// * 2^30 maximum capacity - you'll need to use something else if you /// * 2^30 maximum default capacity - by default AtomicUnorderedInsertMap
/// have more than a billion entries. If this limit bothers you let it /// uses uint32_t internal indexes (and steals 2 bits), limiting you
/// wouldn't be too hard to parameterize the internal indexes between /// to about a billion entries. If you need more you can fill in all
/// uint32_t and uint64_t. /// of the template params so you change IndexType to uint64_t, or you
/// can use AtomicUnorderedInsertMap64. 64-bit indexes will increase
/// the space over of the map, of course.
/// ///
/// WHAT YOU GET IN EXCHANGE: /// WHAT YOU GET IN EXCHANGE:
/// ///
...@@ -133,7 +135,8 @@ template <typename Key, ...@@ -133,7 +135,8 @@ template <typename Key,
(boost::has_trivial_destructor<Key>::value && (boost::has_trivial_destructor<Key>::value &&
boost::has_trivial_destructor<Value>::value), boost::has_trivial_destructor<Value>::value),
template<typename> class Atom = std::atomic, template<typename> class Atom = std::atomic,
typename Allocator = folly::detail::MMapAlloc> typename Allocator = folly::detail::MMapAlloc,
typename IndexType = uint32_t>
struct AtomicUnorderedInsertMap { struct AtomicUnorderedInsertMap {
...@@ -147,7 +150,7 @@ struct AtomicUnorderedInsertMap { ...@@ -147,7 +150,7 @@ struct AtomicUnorderedInsertMap {
typedef const value_type& const_reference; typedef const value_type& const_reference;
typedef struct ConstIterator { typedef struct ConstIterator {
ConstIterator(const AtomicUnorderedInsertMap& owner, uint32_t slot) ConstIterator(const AtomicUnorderedInsertMap& owner, IndexType slot)
: owner_(owner) : owner_(owner)
, slot_(slot) , slot_(slot)
{} {}
...@@ -190,17 +193,17 @@ struct AtomicUnorderedInsertMap { ...@@ -190,17 +193,17 @@ struct AtomicUnorderedInsertMap {
private: private:
const AtomicUnorderedInsertMap& owner_; const AtomicUnorderedInsertMap& owner_;
uint32_t slot_; IndexType slot_;
} const_iterator; } const_iterator;
friend ConstIterator; friend ConstIterator;
/// Constructs a map that will support the insertion of maxSize /// Constructs a map that will support the insertion of maxSize key-value
/// key-value pairs without exceeding the max load factor. Load /// pairs without exceeding the max load factor. Load factors of greater
/// factors of greater than 1 are not supported, and once the actual load /// than 1 are not supported, and once the actual load factor of the
/// factor of the map approaches 1 the insert performance will suffer. /// map approaches 1 the insert performance will suffer. The capacity
/// The capacity is limited to 2^30 (about a billion), beyond which /// is limited to 2^30 (about a billion) for the default IndexType,
/// we will throw invalid_argument. /// beyond which we will throw invalid_argument.
explicit AtomicUnorderedInsertMap( explicit AtomicUnorderedInsertMap(
size_t maxSize, size_t maxSize,
float maxLoadFactor = 0.8f, float maxLoadFactor = 0.8f,
...@@ -208,13 +211,15 @@ struct AtomicUnorderedInsertMap { ...@@ -208,13 +211,15 @@ struct AtomicUnorderedInsertMap {
: allocator_(alloc) : allocator_(alloc)
{ {
size_t capacity = maxSize / std::max(1.0f, maxLoadFactor) + 128; size_t capacity = maxSize / std::max(1.0f, maxLoadFactor) + 128;
if (capacity > (1 << 30) && maxSize < (1 << 30)) { size_t avail = size_t{1} << (8 * sizeof(IndexType) - 2);
if (capacity > avail && maxSize < avail) {
// we'll do our best // we'll do our best
capacity = (1 << 30); capacity = avail;
} }
if (capacity < maxSize || capacity > (1 << 30)) { if (capacity < maxSize || capacity > avail) {
throw std::invalid_argument( throw std::invalid_argument(
"AtomicUnorderedInsertMap capacity must fit in 30 bits"); "AtomicUnorderedInsertMap capacity must fit in IndexType with 2 bits "
"left over");
} }
numSlots_ = capacity; numSlots_ = capacity;
...@@ -319,7 +324,7 @@ struct AtomicUnorderedInsertMap { ...@@ -319,7 +324,7 @@ struct AtomicUnorderedInsertMap {
} }
const_iterator cbegin() const { const_iterator cbegin() const {
uint32_t slot = numSlots_ - 1; IndexType slot = numSlots_ - 1;
while (slot > 0 && slots_[slot].state() != LINKED) { while (slot > 0 && slots_[slot].state() != LINKED) {
--slot; --slot;
} }
...@@ -336,7 +341,7 @@ struct AtomicUnorderedInsertMap { ...@@ -336,7 +341,7 @@ struct AtomicUnorderedInsertMap {
kMaxAllocationTries = 1000, // after this we throw kMaxAllocationTries = 1000, // after this we throw
}; };
enum BucketState : uint32_t { enum BucketState : IndexType {
EMPTY = 0, EMPTY = 0,
CONSTRUCTING = 1, CONSTRUCTING = 1,
LINKED = 2, LINKED = 2,
...@@ -354,10 +359,10 @@ struct AtomicUnorderedInsertMap { ...@@ -354,10 +359,10 @@ struct AtomicUnorderedInsertMap {
/// of the first bucket for the chain whose keys map to this slot. /// of the first bucket for the chain whose keys map to this slot.
/// When things are going well the head usually links to this slot, /// When things are going well the head usually links to this slot,
/// but that doesn't always have to happen. /// but that doesn't always have to happen.
Atom<uint32_t> headAndState_; Atom<IndexType> headAndState_;
/// The next bucket in the chain /// The next bucket in the chain
uint32_t next_; IndexType next_;
/// Key and Value /// Key and Value
typename std::aligned_storage<sizeof(value_type), typename std::aligned_storage<sizeof(value_type),
...@@ -407,7 +412,7 @@ struct AtomicUnorderedInsertMap { ...@@ -407,7 +412,7 @@ struct AtomicUnorderedInsertMap {
Allocator allocator_; Allocator allocator_;
Slot* slots_; Slot* slots_;
uint32_t keyToSlotIdx(const Key& key) const { IndexType keyToSlotIdx(const Key& key) const {
size_t h = hasher()(key); size_t h = hasher()(key);
h &= slotMask_; h &= slotMask_;
while (h >= numSlots_) { while (h >= numSlots_) {
...@@ -416,7 +421,7 @@ struct AtomicUnorderedInsertMap { ...@@ -416,7 +421,7 @@ struct AtomicUnorderedInsertMap {
return h; return h;
} }
uint32_t find(const Key& key, uint32_t slot) const { IndexType find(const Key& key, IndexType slot) const {
KeyEqual ke = {}; KeyEqual ke = {};
auto hs = slots_[slot].headAndState_.load(std::memory_order_acquire); auto hs = slots_[slot].headAndState_.load(std::memory_order_acquire);
for (slot = hs >> 2; slot != 0; slot = slots_[slot].next_) { for (slot = hs >> 2; slot != 0; slot = slots_[slot].next_) {
...@@ -429,7 +434,7 @@ struct AtomicUnorderedInsertMap { ...@@ -429,7 +434,7 @@ struct AtomicUnorderedInsertMap {
/// Allocates a slot and returns its index. Tries to put it near /// Allocates a slot and returns its index. Tries to put it near
/// slots_[start]. /// slots_[start].
uint32_t allocateNear(uint32_t start) { IndexType allocateNear(IndexType start) {
for (auto tries = 0; tries < kMaxAllocationTries; ++tries) { for (auto tries = 0; tries < kMaxAllocationTries; ++tries) {
auto slot = allocationAttempt(start, tries); auto slot = allocationAttempt(start, tries);
auto prev = slots_[slot].headAndState_.load(std::memory_order_acquire); auto prev = slots_[slot].headAndState_.load(std::memory_order_acquire);
...@@ -445,11 +450,16 @@ struct AtomicUnorderedInsertMap { ...@@ -445,11 +450,16 @@ struct AtomicUnorderedInsertMap {
/// Returns the slot we should attempt to allocate after tries failed /// Returns the slot we should attempt to allocate after tries failed
/// tries, starting from the specified slot. This is pulled out so we /// tries, starting from the specified slot. This is pulled out so we
/// can specialize it differently during deterministic testing /// can specialize it differently during deterministic testing
uint32_t allocationAttempt(uint32_t start, uint32_t tries) const { IndexType allocationAttempt(IndexType start, IndexType tries) const {
if (LIKELY(tries < 8 && start + tries < numSlots_)) { if (LIKELY(tries < 8 && start + tries < numSlots_)) {
return start + tries; return start + tries;
} else { } else {
uint32_t rv = folly::Random::rand32(numSlots_); IndexType rv;
if (sizeof(IndexType) <= 4) {
rv = folly::Random::rand32(numSlots_);
} else {
rv = folly::Random::rand64(numSlots_);
}
assert(rv < numSlots_); assert(rv < numSlots_);
return rv; return rv;
} }
...@@ -463,6 +473,29 @@ struct AtomicUnorderedInsertMap { ...@@ -463,6 +473,29 @@ struct AtomicUnorderedInsertMap {
} }
}; };
/// AtomicUnorderedInsertMap64 is just a type alias that makes it easier
/// to select a 64 bit slot index type. Use this if you need a capacity
/// bigger than 2^30 (about a billion). This increases memory overheads,
/// obviously.
template <typename Key,
typename Value,
typename Hash = std::hash<Key>,
typename KeyEqual = std::equal_to<Key>,
bool SkipKeyValueDeletion =
(boost::has_trivial_destructor<Key>::value &&
boost::has_trivial_destructor<Value>::value),
template <typename> class Atom = std::atomic,
typename Allocator = folly::detail::MMapAlloc>
using AtomicUnorderedInsertMap64 =
AtomicUnorderedInsertMap<Key,
Value,
Hash,
KeyEqual,
SkipKeyValueDeletion,
Atom,
Allocator,
uint64_t>;
/// MutableAtom is a tiny wrapper than gives you the option of atomically /// MutableAtom is a tiny wrapper than gives you the option of atomically
/// updating values inserted into an AtomicUnorderedInsertMap<K, /// updating values inserted into an AtomicUnorderedInsertMap<K,
......
...@@ -81,20 +81,38 @@ struct non_atomic { ...@@ -81,20 +81,38 @@ struct non_atomic {
bool is_lock_free() const {return true;} bool is_lock_free() const {return true;}
}; };
template< template <typename Key,
typename Key, typename Value, template<typename> class Atom = non_atomic> typename Value,
using UnorderedInsertMap = AtomicUnorderedInsertMap< typename IndexType,
Key, template <typename> class Atom = std::atomic,
Value, typename Allocator = std::allocator<char>>
std::hash<Key>, using UIM =
std::equal_to<Key>, AtomicUnorderedInsertMap<Key,
(boost::has_trivial_destructor<Key>::value && Value,
boost::has_trivial_destructor<Value>::value), std::hash<Key>,
Atom, std::equal_to<Key>,
std::allocator<char>>; (boost::has_trivial_destructor<Key>::value &&
boost::has_trivial_destructor<Value>::value),
TEST(AtomicUnorderedInsertMap, basic) { Atom,
AtomicUnorderedInsertMap<std::string,std::string> m(100); Allocator,
IndexType>;
namespace {
template <typename T>
struct AtomicUnorderedInsertMapTest : public ::testing::Test {};
}
// uint16_t doesn't make sense for most platforms, but we might as well
// test it
using IndexTypesToTest = ::testing::Types<uint16_t, uint32_t, uint64_t>;
TYPED_TEST_CASE(AtomicUnorderedInsertMapTest, IndexTypesToTest);
TYPED_TEST(AtomicUnorderedInsertMapTest, basic) {
UIM<std::string,
std::string,
TypeParam,
std::atomic,
folly::detail::MMapAlloc> m(100);
m.emplace("abc", "ABC"); m.emplace("abc", "ABC");
EXPECT_TRUE(m.find("abc") != m.cend()); EXPECT_TRUE(m.find("abc") != m.cend());
...@@ -116,8 +134,8 @@ TEST(AtomicUnorderedInsertMap, basic) { ...@@ -116,8 +134,8 @@ TEST(AtomicUnorderedInsertMap, basic) {
EXPECT_TRUE(a != b); EXPECT_TRUE(a != b);
} }
TEST(AtomicUnorderedInsertMap, value_mutation) { TYPED_TEST(AtomicUnorderedInsertMapTest, value_mutation) {
AtomicUnorderedInsertMap<int, MutableAtom<int>> m(100); UIM<int, MutableAtom<int>, TypeParam> m(100);
for (int i = 0; i < 50; ++i) { for (int i = 0; i < 50; ++i) {
m.emplace(i, i); m.emplace(i, i);
...@@ -127,7 +145,7 @@ TEST(AtomicUnorderedInsertMap, value_mutation) { ...@@ -127,7 +145,7 @@ TEST(AtomicUnorderedInsertMap, value_mutation) {
} }
TEST(UnorderedInsertMap, value_mutation) { TEST(UnorderedInsertMap, value_mutation) {
UnorderedInsertMap<int, MutableData<int>> m(100); UIM<int, MutableData<int>, uint32_t, non_atomic> m(100);
for (int i = 0; i < 50; ++i) { for (int i = 0; i < 50; ++i) {
m.emplace(i, i); m.emplace(i, i);
...@@ -137,6 +155,24 @@ TEST(UnorderedInsertMap, value_mutation) { ...@@ -137,6 +155,24 @@ TEST(UnorderedInsertMap, value_mutation) {
EXPECT_EQ(m.find(1)->second.data, 2); EXPECT_EQ(m.find(1)->second.data, 2);
} }
// This test is too expensive to run automatically. On my dev server it
// takes about 10 minutes for dbg build, 2 for opt.
TEST(AtomicUnorderedInsertMap, DISABLED_mega_map) {
size_t capacity = 2000000000;
AtomicUnorderedInsertMap64<size_t,size_t> big(capacity);
for (size_t i = 0; i < capacity * 2; i += 2) {
big.emplace(i, i * 10);
}
for (size_t i = 0; i < capacity * 3; i += capacity / 1000 + 1) {
auto iter = big.find(i);
if ((i & 1) == 0 && i < capacity * 2) {
EXPECT_EQ(iter->second, i * 10);
} else {
EXPECT_TRUE(iter == big.cend());
}
}
}
BENCHMARK(lookup_int_int_hit, iters) { BENCHMARK(lookup_int_int_hit, iters) {
std::unique_ptr<AtomicUnorderedInsertMap<int,size_t>> ptr = {}; std::unique_ptr<AtomicUnorderedInsertMap<int,size_t>> ptr = {};
...@@ -283,7 +319,7 @@ BENCHMARK(std_map) { ...@@ -283,7 +319,7 @@ BENCHMARK(std_map) {
} }
BENCHMARK(atomic_fast_map) { BENCHMARK(atomic_fast_map) {
UnorderedInsertMap<long, long, std::atomic> m(10000); UIM<long, long, uint32_t, std::atomic> m(10000);
for (int i=0; i<10000; ++i) { for (int i=0; i<10000; ++i) {
m.emplace(i,i); m.emplace(i,i);
} }
...@@ -295,7 +331,31 @@ BENCHMARK(atomic_fast_map) { ...@@ -295,7 +331,31 @@ BENCHMARK(atomic_fast_map) {
} }
BENCHMARK(fast_map) { BENCHMARK(fast_map) {
UnorderedInsertMap<long, long> m(10000); UIM<long, long, uint32_t, non_atomic> m(10000);
for (int i=0; i<10000; ++i) {
m.emplace(i,i);
}
for (int i=0; i<10000; ++i) {
auto a = m.find(i);
folly::doNotOptimizeAway(&*a);
}
}
BENCHMARK(atomic_fast_map_64) {
UIM<long, long, uint64_t, std::atomic> m(10000);
for (int i=0; i<10000; ++i) {
m.emplace(i,i);
}
for (int i=0; i<10000; ++i) {
auto a = m.find(i);
folly::doNotOptimizeAway(&*a);
}
}
BENCHMARK(fast_map_64) {
UIM<long, long, uint64_t, non_atomic> m(10000);
for (int i=0; i<10000; ++i) { for (int i=0; i<10000; ++i) {
m.emplace(i,i); m.emplace(i,i);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment