Commit d0889c8c authored by Nathan Bronson's avatar Nathan Bronson Committed by facebook-github-bot-4

templatize AtomicUnorderedInsertMap's internals to allow big maps

Summary: AtomicUnorderedInsertMap used 32 bit index values internally.  2 bits
were stolen, limiting the capacity to 2^30.  This diff makes the internal
index type a template parameter, so you can make really big maps if you
want (at the expense of bigger map overhead).  The easiest way is to
substitute AtomicUnorderedInsertMap64.

Reviewed By: yfeldblum

Differential Revision: D2574338

fb-gh-sync-id: a74994b6da1046a149c2e7763c3dc19e35d9840b
parent 69f3c942
......@@ -55,10 +55,12 @@ namespace folly {
/// O(1/(1-actual_load_factor)). Note that this is a pretty strong
/// limitation, because you can't remove existing keys.
///
/// * 2^30 maximum capacity - you'll need to use something else if you
/// have more than a billion entries. If this limit bothers you let it
/// wouldn't be too hard to parameterize the internal indexes between
/// uint32_t and uint64_t.
/// * 2^30 maximum default capacity - by default AtomicUnorderedInsertMap
/// uses uint32_t internal indexes (and steals 2 bits), limiting you
/// to about a billion entries. If you need more you can fill in all
/// of the template params so you change IndexType to uint64_t, or you
/// can use AtomicUnorderedInsertMap64. 64-bit indexes will increase
/// the space over of the map, of course.
///
/// WHAT YOU GET IN EXCHANGE:
///
......@@ -133,7 +135,8 @@ template <typename Key,
(boost::has_trivial_destructor<Key>::value &&
boost::has_trivial_destructor<Value>::value),
template<typename> class Atom = std::atomic,
typename Allocator = folly::detail::MMapAlloc>
typename Allocator = folly::detail::MMapAlloc,
typename IndexType = uint32_t>
struct AtomicUnorderedInsertMap {
......@@ -147,7 +150,7 @@ struct AtomicUnorderedInsertMap {
typedef const value_type& const_reference;
typedef struct ConstIterator {
ConstIterator(const AtomicUnorderedInsertMap& owner, uint32_t slot)
ConstIterator(const AtomicUnorderedInsertMap& owner, IndexType slot)
: owner_(owner)
, slot_(slot)
{}
......@@ -190,17 +193,17 @@ struct AtomicUnorderedInsertMap {
private:
const AtomicUnorderedInsertMap& owner_;
uint32_t slot_;
IndexType slot_;
} const_iterator;
friend ConstIterator;
/// Constructs a map that will support the insertion of maxSize
/// key-value pairs without exceeding the max load factor. Load
/// factors of greater than 1 are not supported, and once the actual load
/// factor of the map approaches 1 the insert performance will suffer.
/// The capacity is limited to 2^30 (about a billion), beyond which
/// we will throw invalid_argument.
/// Constructs a map that will support the insertion of maxSize key-value
/// pairs without exceeding the max load factor. Load factors of greater
/// than 1 are not supported, and once the actual load factor of the
/// map approaches 1 the insert performance will suffer. The capacity
/// is limited to 2^30 (about a billion) for the default IndexType,
/// beyond which we will throw invalid_argument.
explicit AtomicUnorderedInsertMap(
size_t maxSize,
float maxLoadFactor = 0.8f,
......@@ -208,13 +211,15 @@ struct AtomicUnorderedInsertMap {
: allocator_(alloc)
{
size_t capacity = maxSize / std::max(1.0f, maxLoadFactor) + 128;
if (capacity > (1 << 30) && maxSize < (1 << 30)) {
size_t avail = size_t{1} << (8 * sizeof(IndexType) - 2);
if (capacity > avail && maxSize < avail) {
// we'll do our best
capacity = (1 << 30);
capacity = avail;
}
if (capacity < maxSize || capacity > (1 << 30)) {
if (capacity < maxSize || capacity > avail) {
throw std::invalid_argument(
"AtomicUnorderedInsertMap capacity must fit in 30 bits");
"AtomicUnorderedInsertMap capacity must fit in IndexType with 2 bits "
"left over");
}
numSlots_ = capacity;
......@@ -319,7 +324,7 @@ struct AtomicUnorderedInsertMap {
}
const_iterator cbegin() const {
uint32_t slot = numSlots_ - 1;
IndexType slot = numSlots_ - 1;
while (slot > 0 && slots_[slot].state() != LINKED) {
--slot;
}
......@@ -336,7 +341,7 @@ struct AtomicUnorderedInsertMap {
kMaxAllocationTries = 1000, // after this we throw
};
enum BucketState : uint32_t {
enum BucketState : IndexType {
EMPTY = 0,
CONSTRUCTING = 1,
LINKED = 2,
......@@ -354,10 +359,10 @@ struct AtomicUnorderedInsertMap {
/// of the first bucket for the chain whose keys map to this slot.
/// When things are going well the head usually links to this slot,
/// but that doesn't always have to happen.
Atom<uint32_t> headAndState_;
Atom<IndexType> headAndState_;
/// The next bucket in the chain
uint32_t next_;
IndexType next_;
/// Key and Value
typename std::aligned_storage<sizeof(value_type),
......@@ -407,7 +412,7 @@ struct AtomicUnorderedInsertMap {
Allocator allocator_;
Slot* slots_;
uint32_t keyToSlotIdx(const Key& key) const {
IndexType keyToSlotIdx(const Key& key) const {
size_t h = hasher()(key);
h &= slotMask_;
while (h >= numSlots_) {
......@@ -416,7 +421,7 @@ struct AtomicUnorderedInsertMap {
return h;
}
uint32_t find(const Key& key, uint32_t slot) const {
IndexType find(const Key& key, IndexType slot) const {
KeyEqual ke = {};
auto hs = slots_[slot].headAndState_.load(std::memory_order_acquire);
for (slot = hs >> 2; slot != 0; slot = slots_[slot].next_) {
......@@ -429,7 +434,7 @@ struct AtomicUnorderedInsertMap {
/// Allocates a slot and returns its index. Tries to put it near
/// slots_[start].
uint32_t allocateNear(uint32_t start) {
IndexType allocateNear(IndexType start) {
for (auto tries = 0; tries < kMaxAllocationTries; ++tries) {
auto slot = allocationAttempt(start, tries);
auto prev = slots_[slot].headAndState_.load(std::memory_order_acquire);
......@@ -445,11 +450,16 @@ struct AtomicUnorderedInsertMap {
/// Returns the slot we should attempt to allocate after tries failed
/// tries, starting from the specified slot. This is pulled out so we
/// can specialize it differently during deterministic testing
uint32_t allocationAttempt(uint32_t start, uint32_t tries) const {
IndexType allocationAttempt(IndexType start, IndexType tries) const {
if (LIKELY(tries < 8 && start + tries < numSlots_)) {
return start + tries;
} else {
uint32_t rv = folly::Random::rand32(numSlots_);
IndexType rv;
if (sizeof(IndexType) <= 4) {
rv = folly::Random::rand32(numSlots_);
} else {
rv = folly::Random::rand64(numSlots_);
}
assert(rv < numSlots_);
return rv;
}
......@@ -463,6 +473,29 @@ struct AtomicUnorderedInsertMap {
}
};
/// AtomicUnorderedInsertMap64 is just a type alias that makes it easier
/// to select a 64 bit slot index type. Use this if you need a capacity
/// bigger than 2^30 (about a billion). This increases memory overheads,
/// obviously.
template <typename Key,
typename Value,
typename Hash = std::hash<Key>,
typename KeyEqual = std::equal_to<Key>,
bool SkipKeyValueDeletion =
(boost::has_trivial_destructor<Key>::value &&
boost::has_trivial_destructor<Value>::value),
template <typename> class Atom = std::atomic,
typename Allocator = folly::detail::MMapAlloc>
using AtomicUnorderedInsertMap64 =
AtomicUnorderedInsertMap<Key,
Value,
Hash,
KeyEqual,
SkipKeyValueDeletion,
Atom,
Allocator,
uint64_t>;
/// MutableAtom is a tiny wrapper than gives you the option of atomically
/// updating values inserted into an AtomicUnorderedInsertMap<K,
......
......@@ -81,20 +81,38 @@ struct non_atomic {
bool is_lock_free() const {return true;}
};
template<
typename Key, typename Value, template<typename> class Atom = non_atomic>
using UnorderedInsertMap = AtomicUnorderedInsertMap<
Key,
Value,
std::hash<Key>,
std::equal_to<Key>,
(boost::has_trivial_destructor<Key>::value &&
boost::has_trivial_destructor<Value>::value),
Atom,
std::allocator<char>>;
TEST(AtomicUnorderedInsertMap, basic) {
AtomicUnorderedInsertMap<std::string,std::string> m(100);
template <typename Key,
typename Value,
typename IndexType,
template <typename> class Atom = std::atomic,
typename Allocator = std::allocator<char>>
using UIM =
AtomicUnorderedInsertMap<Key,
Value,
std::hash<Key>,
std::equal_to<Key>,
(boost::has_trivial_destructor<Key>::value &&
boost::has_trivial_destructor<Value>::value),
Atom,
Allocator,
IndexType>;
namespace {
template <typename T>
struct AtomicUnorderedInsertMapTest : public ::testing::Test {};
}
// uint16_t doesn't make sense for most platforms, but we might as well
// test it
using IndexTypesToTest = ::testing::Types<uint16_t, uint32_t, uint64_t>;
TYPED_TEST_CASE(AtomicUnorderedInsertMapTest, IndexTypesToTest);
TYPED_TEST(AtomicUnorderedInsertMapTest, basic) {
UIM<std::string,
std::string,
TypeParam,
std::atomic,
folly::detail::MMapAlloc> m(100);
m.emplace("abc", "ABC");
EXPECT_TRUE(m.find("abc") != m.cend());
......@@ -116,8 +134,8 @@ TEST(AtomicUnorderedInsertMap, basic) {
EXPECT_TRUE(a != b);
}
TEST(AtomicUnorderedInsertMap, value_mutation) {
AtomicUnorderedInsertMap<int, MutableAtom<int>> m(100);
TYPED_TEST(AtomicUnorderedInsertMapTest, value_mutation) {
UIM<int, MutableAtom<int>, TypeParam> m(100);
for (int i = 0; i < 50; ++i) {
m.emplace(i, i);
......@@ -127,7 +145,7 @@ TEST(AtomicUnorderedInsertMap, value_mutation) {
}
TEST(UnorderedInsertMap, value_mutation) {
UnorderedInsertMap<int, MutableData<int>> m(100);
UIM<int, MutableData<int>, uint32_t, non_atomic> m(100);
for (int i = 0; i < 50; ++i) {
m.emplace(i, i);
......@@ -137,6 +155,24 @@ TEST(UnorderedInsertMap, value_mutation) {
EXPECT_EQ(m.find(1)->second.data, 2);
}
// This test is too expensive to run automatically. On my dev server it
// takes about 10 minutes for dbg build, 2 for opt.
TEST(AtomicUnorderedInsertMap, DISABLED_mega_map) {
size_t capacity = 2000000000;
AtomicUnorderedInsertMap64<size_t,size_t> big(capacity);
for (size_t i = 0; i < capacity * 2; i += 2) {
big.emplace(i, i * 10);
}
for (size_t i = 0; i < capacity * 3; i += capacity / 1000 + 1) {
auto iter = big.find(i);
if ((i & 1) == 0 && i < capacity * 2) {
EXPECT_EQ(iter->second, i * 10);
} else {
EXPECT_TRUE(iter == big.cend());
}
}
}
BENCHMARK(lookup_int_int_hit, iters) {
std::unique_ptr<AtomicUnorderedInsertMap<int,size_t>> ptr = {};
......@@ -283,7 +319,7 @@ BENCHMARK(std_map) {
}
BENCHMARK(atomic_fast_map) {
UnorderedInsertMap<long, long, std::atomic> m(10000);
UIM<long, long, uint32_t, std::atomic> m(10000);
for (int i=0; i<10000; ++i) {
m.emplace(i,i);
}
......@@ -295,7 +331,31 @@ BENCHMARK(atomic_fast_map) {
}
BENCHMARK(fast_map) {
UnorderedInsertMap<long, long> m(10000);
UIM<long, long, uint32_t, non_atomic> m(10000);
for (int i=0; i<10000; ++i) {
m.emplace(i,i);
}
for (int i=0; i<10000; ++i) {
auto a = m.find(i);
folly::doNotOptimizeAway(&*a);
}
}
BENCHMARK(atomic_fast_map_64) {
UIM<long, long, uint64_t, std::atomic> m(10000);
for (int i=0; i<10000; ++i) {
m.emplace(i,i);
}
for (int i=0; i<10000; ++i) {
auto a = m.find(i);
folly::doNotOptimizeAway(&*a);
}
}
BENCHMARK(fast_map_64) {
UIM<long, long, uint64_t, non_atomic> m(10000);
for (int i=0; i<10000; ++i) {
m.emplace(i,i);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment