Commit 4a7ac66b authored by Maged Michael's avatar Maged Michael Committed by Facebook GitHub Bot

hazard pointers: Add microbenchmark of thread cache misses

Summary:
Add a microbenchmark for thread cache misses.

Add member function evict to hazptr_tc.

Add free function hazptr_tc_evict for use in testing and benchmarking only.

Remove the warning for thread cache overflow.

Add member function delete_hazard_pointers to hazptr_domain to be used only for testing and benchmarking.

## Microbenchmark Results

Results for TC hits and misses under an extreme case of 10000 in-use hazard pointers:
```
10x construct/destruct hazptr_array<9>            105 ns     97 ns     95 ns
1/1000 TC hit + miss & overflow                   550 ns    502 ns    468 ns
```

The latency for constructing/destroying a 9-hazard-pointer-array from TC hits is about 10 ns.

The latency for constructing/destroying two 9-hazard-pointer-arrays from one TC hit and one TC miss/overflow is about 500 us (i.e., 500,000 ns).

Reviewed By: yfeldblum

Differential Revision: D31102027

fbshipit-source-id: dee7333d77ebde25dcf18c6af72cc04e99788e43
parent 88afc741
......@@ -119,6 +119,9 @@ class hazptr_tc;
template <template <typename> class Atom = std::atomic>
hazptr_tc<Atom>& hazptr_tc_tls();
/** hazptr_tc_evict -- Used only for benchmarking */
void hazptr_tc_evict();
///
/// Hazard pointer domain
/// Defined in HazptrDomain.h
......
......@@ -183,6 +183,22 @@ class hazptr_domain {
wait_for_zero_bulk_reclaims(); // wait for concurrent bulk_reclaim-s
}
/** delete_hazard_pointers -- Used only for benchmarking */
void delete_hazard_pointers() {
// Call cleanup() to ensure that there is no lagging concurrent
// asynchronous reclamation in progress.
cleanup();
auto rec = head();
while (rec) {
auto next = rec->next();
rec->~hazptr_rec<Atom>();
hazptr_rec_alloc{}.deallocate(rec, 1);
rec = next;
}
hazptrs_.store(nullptr);
hcount_.store(0);
}
/** cleanup_cohort_tag */
void cleanup_cohort_tag(const hazptr_obj_cohort<Atom>* cohort) noexcept {
auto tag = reinterpret_cast<uintptr_t>(cohort) + kTagBit;
......
......@@ -91,6 +91,7 @@ class hazptr_tc {
friend hazptr_holder<Atom> make_hazard_pointer<Atom>(hazptr_domain<Atom>&);
template <uint8_t M, template <typename> class A>
friend hazptr_array<M, A> make_hazard_pointer_array();
friend void hazptr_tc_evict();
FOLLY_ALWAYS_INLINE
hazptr_tc_entry<Atom>& operator[](uint8_t i) noexcept {
......@@ -111,19 +112,9 @@ class hazptr_tc {
entry_[count_++].fill(hprec);
return true;
}
hazptr_warning_tc_overflow();
return false;
}
FOLLY_EXPORT FOLLY_NOINLINE void hazptr_warning_tc_overflow() {
static std::atomic<uint64_t> warning_count{0};
if ((warning_count++ % 10000) == 0) {
LOG(WARNING) << "Hazptr thread cache overflow "
<< std::this_thread::get_id();
;
}
}
FOLLY_ALWAYS_INLINE uint8_t count() const noexcept { return count_; }
FOLLY_ALWAYS_INLINE void set_count(uint8_t val) noexcept { count_ = val; }
......@@ -144,6 +135,8 @@ class hazptr_tc {
}
}
void evict() { evict(count()); }
bool local() const noexcept { // for debugging only
return local_;
}
......@@ -160,6 +153,11 @@ FOLLY_ALWAYS_INLINE hazptr_tc<Atom>& hazptr_tc_tls() {
return folly::SingletonThreadLocal<hazptr_tc<Atom>, hazptr_tc_tls_tag>::get();
}
/** hazptr_tc_evict -- Used only for benchmarking */
inline void hazptr_tc_evict() {
hazptr_tc_tls<>().evict();
}
} // namespace folly
#endif // FOLLY_HAZPTR_THR_LOCAL
......@@ -50,6 +50,7 @@ using folly::hazptr_obj_cohort;
using folly::hazptr_retire;
using folly::hazptr_root;
using folly::hazptr_tc;
using folly::hazptr_tc_evict;
using folly::HazptrLockFreeLIFO;
using folly::HazptrSWMRSet;
using folly::HazptrWideCAS;
......@@ -1504,6 +1505,44 @@ uint64_t cohort_bench(std::string name, int nthreads) {
return bench(name, ops, repFn);
}
uint64_t tc_miss_bench(std::string name, int nthreads) {
hazptr_tc_evict();
hazard_pointer_default_domain<>().delete_hazard_pointers();
// Thread cache capacity
constexpr int C = hazptr_tc<>::capacity();
// Number of unavailable hazard pointers that will be at the head of
// the main list of hazard pointers before reaching available ones.
constexpr int N = 10000;
// Max number of threads
constexpr int P = 100;
hazard_pointer<> aa[N + 2 * C * P];
// The following creates N+2*C*P hazard pointers
for (int i = 0; i < N + 2 * C * P; ++i) {
aa[i] = make_hazard_pointer<>();
}
// Make the last 2*C*P in the domain's hazard pointer list available
for (int i = 0; i < 2 * C * P; ++i) {
aa[i] = hazard_pointer<>();
}
hazptr_tc_evict();
// The domain now has N unavailable hazard pointers at the head of
// the list following by C*P available ones.
auto repFn = [&] {
auto init = [] {};
auto fn = [&](int tid) {
for (int j = tid; j < ops / 1000; j += nthreads) {
// By using twice the TC capacity, each iteration does one
// filling and one eviction of the TC.
hazptr_array<C> a1 = make_hazard_pointer_array<C>();
hazptr_array<C> a2 = make_hazard_pointer_array<C>();
}
};
auto endFn = [] {};
return run_once(nthreads, init, fn, endFn);
};
return bench(name, ops, repFn);
}
const int nthr[] = {1, 10};
const int sizes[] = {10, 20};
......@@ -1526,6 +1565,10 @@ void benches() {
local_bench<2>("", i);
std::cout << "10x construct/destruct hazptr_local<3> ";
local_bench<3>("", i);
std::cout << "10x construct/destruct hazptr_array<9> ";
array_bench<9>("", i);
std::cout << "1/1000 TC hit + miss & overflow ";
tc_miss_bench("", i);
std::cout << "allocate/retire/reclaim object ";
obj_bench("", i);
for (int j : sizes) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment