Commit 53e6886f authored by Nathan Bronson's avatar Nathan Bronson Committed by Sara Golemon

Move AtomicUnorderedInsertMap to folly.

Summary: AtomicUnorderedInsertMap is a concurrent hash table that firmly
at the performance end of the generality <-> performance spectrum.
If you don't need updates (or can use your own concurrency control when
overwriting values), you never need to delete, and you can predict your
capacity perfectly, then you will get wait-free reads, lock-free inserts,
safe concurrent iteration, and excellent cache and performance outlier
behavior.  Arbitrary key and value types are supported.

Reviewed By: @yfeldblum

Differential Revision: D2145281
parent fe6e73a6
This diff is collapsed.
...@@ -28,6 +28,7 @@ nobase_follyinclude_HEADERS = \ ...@@ -28,6 +28,7 @@ nobase_follyinclude_HEADERS = \
AtomicHashMap-inl.h \ AtomicHashMap-inl.h \
AtomicLinkedList.h \ AtomicLinkedList.h \
AtomicStruct.h \ AtomicStruct.h \
AtomicUnorderedMap.h \
Baton.h \ Baton.h \
Benchmark.h \ Benchmark.h \
Bits.h \ Bits.h \
...@@ -39,6 +40,7 @@ nobase_follyinclude_HEADERS = \ ...@@ -39,6 +40,7 @@ nobase_follyinclude_HEADERS = \
CpuId.h \ CpuId.h \
CPortability.h \ CPortability.h \
detail/AtomicHashUtils.h \ detail/AtomicHashUtils.h \
detail/AtomicUnorderedMapUtils.h \
detail/BitIteratorDetail.h \ detail/BitIteratorDetail.h \
detail/BitsDetail.h \ detail/BitsDetail.h \
detail/CacheLocality.h \ detail/CacheLocality.h \
......
#pragma once
#include <atomic>
#include <stdint.h>
#include <sys/mman.h>
#include <unistd.h>
namespace folly { namespace detail {
class MMapAlloc {
private:
size_t computeSize(size_t size) {
long pagesize = sysconf(_SC_PAGESIZE);
size_t mmapLength = ((size - 1) & ~(pagesize - 1)) + pagesize;
assert(size <= mmapLength && mmapLength < size + pagesize);
assert((mmapLength % pagesize) == 0);
return mmapLength;
}
public:
void* allocate(size_t size) {
auto len = computeSize(size);
// MAP_HUGETLB is a perf win, but requires cooperation from the
// deployment environment (and a change to computeSize()).
void* mem = static_cast<void*>(mmap(
nullptr,
len,
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE,
-1,
0));
if (mem == reinterpret_cast<void*>(-1)) {
throw std::system_error(errno, std::system_category());
}
return mem;
}
void deallocate(void* p, size_t size) {
auto len = computeSize(size);
munmap(p, len);
}
};
template<typename Allocator>
struct GivesZeroFilledMemory : public std::false_type {};
template<>
struct GivesZeroFilledMemory<MMapAlloc> : public std::true_type{};
}}
/*
* Copyright 2015 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <folly/AtomicUnorderedMap.h>
#include <folly/test/DeterministicSchedule.h>
#include <thread>
#include <semaphore.h>
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include <folly/Benchmark.h>
#include <unordered_map>
using namespace folly;
using namespace folly::test;
template<class T>
struct non_atomic {
T value;
non_atomic() = default;
non_atomic(const non_atomic&) = delete;
constexpr /* implicit */ non_atomic(T desired): value(desired) {}
T operator+=(T arg) { value += arg; return load();}
T load(std::memory_order order= std::memory_order_seq_cst) const {
return value;
}
/* implicit */
operator T() const {return load();}
void store(T desired, std::memory_order order = std::memory_order_seq_cst) {
value = desired;
}
T exchange(T desired, std::memory_order order = std::memory_order_seq_cst) {
T old = load();
store(desired);
return old;
}
bool compare_exchange_weak(
T& expected, T desired,
std::memory_order success = std::memory_order_seq_cst,
std::memory_order failure = std::memory_order_seq_cst) {
if (value == expected) {
value = desired;
return true;
}
expected = value;
return false;
}
bool compare_exchange_strong(
T& expected, T desired,
std::memory_order success = std::memory_order_seq_cst,
std::memory_order failure = std::memory_order_seq_cst) {
if (value == expected) {
value = desired;
return true;
}
expected = value;
return false;
}
bool is_lock_free() const {return true;}
};
template<
typename Key, typename Value, template<typename> class Atom = non_atomic>
using UnorderedInsertMap = AtomicUnorderedInsertMap<
Key,
Value,
std::hash<Key>,
std::equal_to<Key>,
(boost::has_trivial_destructor<Key>::value &&
boost::has_trivial_destructor<Value>::value),
Atom,
std::allocator<char>>;
TEST(AtomicUnorderedInsertMap, basic) {
AtomicUnorderedInsertMap<std::string,std::string> m(100);
m.emplace("abc", "ABC");
EXPECT_TRUE(m.find("abc") != m.cend());
EXPECT_EQ(m.find("abc")->first, "abc");
EXPECT_EQ(m.find("abc")->second, "ABC");
EXPECT_TRUE(m.find("def") == m.cend());
auto iter = m.cbegin();
EXPECT_TRUE(iter != m.cend());
EXPECT_TRUE(iter == m.find("abc"));
auto a = iter;
EXPECT_TRUE(a == iter);
auto b = iter;
++iter;
EXPECT_TRUE(iter == m.cend());
EXPECT_TRUE(a == b);
EXPECT_TRUE(a != iter);
a++;
EXPECT_TRUE(a == iter);
EXPECT_TRUE(a != b);
}
TEST(AtomicUnorderedInsertMap, value_mutation) {
AtomicUnorderedInsertMap<int, MutableAtom<int>> m(100);
for (int i = 0; i < 50; ++i) {
m.emplace(i, i);
}
m.find(1)->second.data++;
}
TEST(UnorderedInsertMap, value_mutation) {
UnorderedInsertMap<int, MutableData<int>> m(100);
for (int i = 0; i < 50; ++i) {
m.emplace(i, i);
}
m.find(1)->second.data++;
EXPECT_EQ(m.find(1)->second.data, 2);
}
BENCHMARK(lookup_int_int_hit, iters) {
std::unique_ptr<AtomicUnorderedInsertMap<int,size_t>> ptr = {};
size_t capacity = 100000;
BENCHMARK_SUSPEND {
ptr.reset(new AtomicUnorderedInsertMap<int,size_t>(capacity));
for (size_t i = 0; i < capacity; ++i) {
auto k = 3 * ((5641 * i) % capacity);
ptr->emplace(k, k + 1);
EXPECT_EQ(ptr->find(k)->second, k + 1);
}
}
for (size_t i = 0; i < iters; ++i) {
size_t k = 3 * (((i * 7919) ^ (i * 4001)) % capacity);
auto iter = ptr->find(k);
if (iter == ptr->cend() ||
iter->second != k + 1) {
auto jter = ptr->find(k);
EXPECT_TRUE(iter == jter);
}
EXPECT_EQ(iter->second, k + 1);
}
BENCHMARK_SUSPEND {
ptr.reset(nullptr);
}
}
struct PairHash {
size_t operator()(const std::pair<uint64_t,uint64_t>& pr) const {
return pr.first ^ pr.second;
}
};
void contendedRW(size_t itersPerThread,
size_t capacity,
size_t numThreads,
size_t readsPerWrite) {
typedef std::pair<uint64_t,uint64_t> Key;
typedef AtomicUnorderedInsertMap<Key,MutableAtom<uint32_t>,PairHash> Map;
std::unique_ptr<Map> ptr = {};
std::atomic<bool> go;
std::vector<std::thread> threads;
BENCHMARK_SUSPEND {
ptr.reset(new Map(capacity));
while (threads.size() < numThreads) {
threads.emplace_back([&](){
while (!go) {
std::this_thread::yield();
}
size_t reads = 0;
size_t writes = 0;
while (reads + writes < itersPerThread) {
auto r = Random::rand32();
Key key(reads + writes, r);
if (reads < writes * readsPerWrite ||
writes >= capacity / numThreads) {
// read needed
++reads;
auto iter = ptr->find(key);
EXPECT_TRUE(
iter == ptr->cend() ||
iter->second.data.load(std::memory_order_acquire) >= key.first);
} else {
++writes;
try {
auto pr = ptr->emplace(key, key.first);
if (!pr.second) {
pr.first->second.data++;
}
} catch (std::bad_alloc& x) {
LOG(INFO) << "bad alloc";
}
}
}
});
}
}
go = true;
for (auto& thr : threads) {
thr.join();
}
BENCHMARK_SUSPEND {
ptr.reset(nullptr);
}
}
// sudo nice -n -20 ~/fbcode/_bin/common/concurrency/experimental/atomic_unordered_map --benchmark --bm_min_iters=1000000
//
// without MAP_HUGETLB (default)
//
// ============================================================================
// common/concurrency/experimental/AtomicUnorderedMapTest.cpprelative time/iter
// iters/s
// ============================================================================
// lookup_int_int_hit 20.05ns 49.89M
// contendedRW(small_32thr_99pct) 70.36ns 14.21M
// contendedRW(large_32thr_99pct) 164.23ns 6.09M
// contendedRW(large_32thr_99_9pct) 158.81ns 6.30M
// ============================================================================
//
// with MAP_HUGETLB hacked in
// ============================================================================
// lookup_int_int_hit 19.67ns 50.84M
// contendedRW(small_32thr_99pct) 62.46ns 16.01M
// contendedRW(large_32thr_99pct) 119.41ns 8.37M
// contendedRW(large_32thr_99_9pct) 111.23ns 8.99M
// ============================================================================
BENCHMARK_NAMED_PARAM(contendedRW, small_32thr_99pct, 100000, 32, 99)
BENCHMARK_NAMED_PARAM(contendedRW, large_32thr_99pct, 100000000, 32, 99)
BENCHMARK_NAMED_PARAM(contendedRW, large_32thr_99_9pct, 100000000, 32, 999)
BENCHMARK_DRAW_LINE();
// sudo nice -n -20 ~/fbcode/_build/opt/site_integrity/quasar/experimental/atomic_unordered_map_test --benchmark --bm_min_iters=10000
// Single threaded benchmarks to test how much better we are than
// std::unordered_map and what is the cost of using atomic operations
// in the uncontended use case
// ============================================================================
// std_map 1.20ms 832.58
// atomic_fast_map 511.35us 1.96K
// fast_map 196.28us 5.09K
// ============================================================================
BENCHMARK(std_map) {
std::unordered_map<long, long> m;
m.reserve(10000);
for (int i=0; i<10000; ++i) {
m.emplace(i,i);
}
for (int i=0; i<10000; ++i) {
auto a = m.find(i);
folly::doNotOptimizeAway(&*a);
}
}
BENCHMARK(atomic_fast_map) {
UnorderedInsertMap<long, long, std::atomic> m(10000);
for (int i=0; i<10000; ++i) {
m.emplace(i,i);
}
for (int i=0; i<10000; ++i) {
auto a = m.find(i);
folly::doNotOptimizeAway(&*a);
}
}
BENCHMARK(fast_map) {
UnorderedInsertMap<long, long> m(10000);
for (int i=0; i<10000; ++i) {
m.emplace(i,i);
}
for (int i=0; i<10000; ++i) {
auto a = m.find(i);
folly::doNotOptimizeAway(&*a);
}
}
int main(int argc, char ** argv) {
testing::InitGoogleTest(&argc, argv);
google::ParseCommandLineFlags(&argc, &argv, true);
int rv = RUN_ALL_TESTS();
folly::runBenchmarksOnFlag();
return rv;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment