Commit 0e92d3c2 authored by Maged Michael's avatar Maged Michael Committed by Facebook GitHub Bot

hazard pointers: Optimize allocation of hazard pointers from domain

Summary:
Optimize the management of available hazard pointers in the domain that are not in thread caches, by adding a linked list of available hazard pointers with a lock bit packed with the pointer to the head of the list. Pop operations are done in two atomic steps: (1) set lock bit, (2) pop hazard pointers and clear lock bit. Push operations are done in one atomic step, but can proceed only when the lock bit is clear.

Microbenchmark results (in a process with 10K+ hazard pointers) show reduction in the latency of constructing destroying two 9-hazard-pointer-arrays that involve one TC hit and one TC miss/overflow from hundreds of microseconds to tens of nanoseconds.

Before:
```
1/1000 TC hit + miss & overflow                   550 ns    502 ns    468 ns
```

After:
```
TC hit + miss & overflow                           49 ns     48 ns     48 ns
```

Reviewed By: yfeldblum

Differential Revision: D31102053

fbshipit-source-id: ae923bf3b05676e7572cca35179531e947846300
parent 4a7ac66b
......@@ -95,6 +95,7 @@ constexpr int hazptr_domain_rcount_threshold() {
template <template <typename> class Atom>
class hazptr_domain {
using Obj = hazptr_obj<Atom>;
using Rec = hazptr_rec<Atom>;
using List = hazptr_detail::linked_list<Obj>;
using ObjList = hazptr_obj_list<Atom>;
using RetiredList = hazptr_detail::shared_head_only_list<Obj, Atom>;
......@@ -106,6 +107,7 @@ class hazptr_domain {
static constexpr int kListTooLarge = 100000;
static constexpr uint64_t kSyncTimePeriod{2000000000}; // nanoseconds
static constexpr uintptr_t kTagBit = hazptr_obj<Atom>::kTagBit;
static constexpr uintptr_t kLockBit = 1;
static constexpr int kIgnoredLowBits = 8;
static constexpr int kNumShards = 8;
......@@ -117,7 +119,8 @@ class hazptr_domain {
return &folly::QueuedImmediateExecutor::instance();
}
Atom<hazptr_rec<Atom>*> hazptrs_{nullptr};
Atom<Rec*> hazptrs_{nullptr};
Atom<uintptr_t> avail_{reinterpret_cast<uintptr_t>(nullptr)};
Atom<uint64_t> sync_time_{0};
/* Using signed int for rcount_ because it may transiently be negative.
Using signed int for all integer variables that may be involved in
......@@ -188,15 +191,16 @@ class hazptr_domain {
// Call cleanup() to ensure that there is no lagging concurrent
// asynchronous reclamation in progress.
cleanup();
auto rec = head();
Rec* rec = head();
while (rec) {
auto next = rec->next();
rec->~hazptr_rec<Atom>();
rec->~Rec();
hazptr_rec_alloc{}.deallocate(rec, 1);
rec = next;
}
hazptrs_.store(nullptr);
hcount_.store(0);
avail_.store(reinterpret_cast<uintptr_t>(nullptr));
}
/** cleanup_cohort_tag */
......@@ -224,13 +228,13 @@ class hazptr_domain {
}
private:
using hazptr_rec_alloc = AlignedSysAllocator<
hazptr_rec<Atom>,
FixedAlign<alignof(hazptr_rec<Atom>)>>;
using hazptr_rec_alloc = AlignedSysAllocator<Rec, FixedAlign<alignof(Rec)>>;
friend void hazptr_domain_push_retired<Atom>(
hazptr_obj_list<Atom>&, hazptr_domain<Atom>&) noexcept;
friend hazptr_holder<Atom> make_hazard_pointer<Atom>(hazptr_domain<Atom>&);
template <uint8_t M, template <typename> class A>
friend hazptr_array<M, A> make_hazard_pointer_array();
friend class hazptr_holder<Atom>;
friend class hazptr_obj<Atom>;
friend class hazptr_obj_cohort<Atom>;
......@@ -278,14 +282,48 @@ class hazptr_domain {
num_bulk_reclaims_.fetch_sub(1, std::memory_order_release);
}
/** hprec_acquire */
hazptr_rec<Atom>* hprec_acquire() {
auto rec = try_acquire_existing_hprec();
return rec != nullptr ? rec : acquire_new_hprec();
uintptr_t load_avail() { return avail_.load(std::memory_order_acquire); }
void store_avail(uintptr_t val) {
avail_.store(val, std::memory_order_release);
}
bool cas_avail(uintptr_t& expval, uintptr_t newval) {
return avail_.compare_exchange_weak(
expval, newval, std::memory_order_acq_rel, std::memory_order_acquire);
}
/** acquire_hprecs */
Rec* acquire_hprecs(uint8_t num) {
DCHECK_GE(num, 1);
// C++17: auto [n, head] = try_pop_available_hprecs(num);
uint8_t n;
Rec* head;
std::tie(n, head) = try_pop_available_hprecs(num);
for (; n < num; ++n) {
Rec* rec = create_new_hprec();
DCHECK(rec->next_avail() == nullptr);
rec->set_next_avail(head);
head = rec;
}
DCHECK(head);
return head;
}
/** release_hprec */
void release_hprec(Rec* hprec) noexcept {
DCHECK(hprec);
DCHECK(hprec->next_avail() == nullptr);
push_available_hprecs(hprec, hprec);
}
/** hprec_release */
void hprec_release(hazptr_rec<Atom>* hprec) noexcept { hprec->release(); }
/** release_hprecs */
void release_hprecs(Rec* head, Rec* tail) noexcept {
DCHECK(head);
DCHECK(tail);
DCHECK(tail->next_avail() == nullptr);
push_available_hprecs(head, tail);
}
/** push_list */
void push_list(ObjList& l) {
......@@ -523,7 +561,7 @@ class hazptr_domain {
}
}
hazptr_rec<Atom>* head() const noexcept {
Rec* head() const noexcept {
return hazptrs_.load(std::memory_order_acquire);
}
......@@ -555,8 +593,7 @@ class hazptr_domain {
auto rec = head();
while (rec) {
auto next = rec->next();
DCHECK(!rec->active());
rec->~hazptr_rec<Atom>();
rec->~Rec();
hazptr_rec_alloc{}.deallocate(rec, 1);
rec = next;
}
......@@ -568,22 +605,92 @@ class hazptr_domain {
}
}
hazptr_rec<Atom>* try_acquire_existing_hprec() {
auto rec = head();
std::pair<uint8_t, Rec*> try_pop_available_hprecs(uint8_t num) {
DCHECK_GE(num, 1);
while (true) {
uintptr_t avail = load_avail();
if (avail == reinterpret_cast<uintptr_t>(nullptr)) {
return {0, nullptr};
}
if ((avail & kLockBit) == 0) {
// Try to lock avail list
if (cas_avail(avail, avail | kLockBit)) {
// Lock acquired
Rec* head = reinterpret_cast<Rec*>(avail);
uint8_t nn = pop_available_hprecs_release_lock(num, head);
// Lock released
DCHECK_GE(nn, 1);
DCHECK_LE(nn, num);
return {nn, head};
}
} else {
std::this_thread::yield();
}
}
}
uint8_t pop_available_hprecs_release_lock(uint8_t num, Rec* head) {
// Lock already acquired
DCHECK_GE(num, 1);
DCHECK(head);
Rec* tail = head;
uint8_t nn = 1;
Rec* next = tail->next_avail();
while ((next != nullptr) && (nn < num)) {
DCHECK_EQ(reinterpret_cast<uintptr_t>(next) & kLockBit, 0);
tail = next;
next = tail->next_avail();
++nn;
}
uintptr_t newval = reinterpret_cast<uintptr_t>(next);
DCHECK_EQ(newval & kLockBit, 0);
// Release lock
store_avail(newval);
tail->set_next_avail(nullptr);
return nn;
}
void push_available_hprecs(Rec* head, Rec* tail) {
DCHECK(head);
DCHECK(tail);
DCHECK(tail->next_avail() == nullptr);
if (kIsDebug) {
dcheck_connected(head, tail);
}
uintptr_t newval = reinterpret_cast<uintptr_t>(head);
DCHECK_EQ(newval & kLockBit, 0);
while (true) {
uintptr_t avail = load_avail();
if ((avail & kLockBit) == 0) {
// Try to push if unlocked
auto next = reinterpret_cast<Rec*>(avail);
tail->set_next_avail(next);
if (cas_avail(avail, newval)) {
break;
}
} else {
std::this_thread::yield();
}
}
}
void dcheck_connected(Rec* head, Rec* tail) {
Rec* rec = head;
bool connected = false;
while (rec) {
auto next = rec->next();
if (rec->try_acquire()) {
return rec;
Rec* next = rec->next_avail();
if (rec == tail) {
connected = true;
DCHECK(next == nullptr);
}
rec = next;
}
return nullptr;
DCHECK(connected);
}
hazptr_rec<Atom>* acquire_new_hprec() {
Rec* create_new_hprec() {
auto rec = hazptr_rec_alloc{}.allocate(1);
new (rec) hazptr_rec<Atom>();
rec->set_active();
new (rec) Rec();
rec->set_domain(this);
while (true) {
auto h = head();
......
......@@ -89,7 +89,7 @@ class hazptr_holder {
}
}
#endif
domain->hprec_release(hprec_);
domain->release_hprec(hprec_);
}
}
......@@ -189,7 +189,9 @@ FOLLY_ALWAYS_INLINE hazptr_holder<Atom> make_hazard_pointer(
}
}
#endif
auto hprec = domain.hprec_acquire();
auto hprec = domain.acquire_hprecs(1);
DCHECK(hprec);
DCHECK(hprec->next_avail() == nullptr);
return hazptr_holder<Atom>(hprec);
}
......@@ -331,9 +333,15 @@ FOLLY_ALWAYS_INLINE hazptr_array<M, Atom> make_hazard_pointer_array() {
}
tc.set_count(offset);
#else
auto hprec = hazard_pointer_default_domain<Atom>().acquire_hprecs(M);
for (uint8_t i = 0; i < M; ++i) {
new (&h[i]) hazptr_holder<Atom>(make_hazard_pointer<Atom>());
DCHECK(hprec);
auto next = hprec->next_avail();
hprec->set_next_avail(nullptr);
new (&h[i]) hazptr_holder<Atom>(hprec);
hprec = next;
}
DCHECK(hprec == nullptr);
#endif
a.empty_ = false;
return a;
......
......@@ -32,12 +32,17 @@ template <template <typename> class Atom>
class alignas(hardware_destructive_interference_size) hazptr_rec {
Atom<const void*> hazptr_{nullptr}; // the hazard pointer
hazptr_domain<Atom>* domain_;
hazptr_rec* next_;
Atom<bool> active_{false};
hazptr_rec* next_; // Next in the main hazard pointer list. Immutable.
hazptr_rec* nextAvail_{nullptr}; // Next available hazard pointer.
friend class hazptr_domain<Atom>;
friend class hazptr_holder<Atom>;
friend class hazptr_tc_entry<Atom>;
#if FOLLY_HAZPTR_THR_LOCAL
friend class hazptr_tc<Atom>;
#endif
friend hazptr_holder<Atom> make_hazard_pointer<Atom>(hazptr_domain<Atom>&);
template <uint8_t M, template <typename> class A>
friend hazptr_array<M, A> make_hazard_pointer_array();
const void* hazptr() const noexcept {
return hazptr_.load(std::memory_order_acquire);
......@@ -47,25 +52,14 @@ class alignas(hardware_destructive_interference_size) hazptr_rec {
hazptr_.store(p, std::memory_order_release);
}
bool active() const noexcept {
return active_.load(std::memory_order_acquire);
}
void set_active() noexcept { active_.store(true, std::memory_order_relaxed); }
bool try_acquire() noexcept {
bool a = active();
return !a &&
active_.compare_exchange_strong(
a, true, std::memory_order_release, std::memory_order_relaxed);
}
void release() noexcept { active_.store(false, std::memory_order_release); }
hazptr_rec<Atom>* next() { return next_; }
hazptr_rec<Atom>* next_avail() { return nextAvail_; }
void set_next(hazptr_rec<Atom>* rec) { next_ = rec; }
void set_next_avail(hazptr_rec<Atom>* rec) { nextAvail_ = rec; }
FOLLY_ALWAYS_INLINE hazptr_domain<Atom>* domain() { return domain_; }
void set_domain(hazptr_domain<Atom>* dom) { domain_ = dom; }
......
......@@ -56,8 +56,6 @@ class hazptr_tc_entry {
}
FOLLY_ALWAYS_INLINE hazptr_rec<Atom>* get() const noexcept { return hprec_; }
void evict() { hprec_->release(); }
}; // hazptr_tc_entry
/**
......@@ -74,15 +72,13 @@ class hazptr_tc {
bool local_{false}; // for debug mode only
public:
~hazptr_tc() {
for (uint8_t i = 0; i < count(); ++i) {
entry_[i].evict();
}
}
~hazptr_tc() { evict(count()); }
static constexpr uint8_t capacity() noexcept { return kCapacity; }
private:
using Rec = hazptr_rec<Atom>;
template <uint8_t, template <typename> class>
friend class hazptr_array;
friend class hazptr_holder<Atom>;
......@@ -122,17 +118,37 @@ class hazptr_tc {
FOLLY_NOINLINE void fill(uint8_t num) {
DCHECK_LE(count_ + num, capacity());
auto& domain = default_hazptr_domain<Atom>();
Rec* hprec = domain.acquire_hprecs(num);
for (uint8_t i = 0; i < num; ++i) {
auto hprec = domain.hprec_acquire();
DCHECK(hprec);
Rec* next = hprec->next_avail();
hprec->set_next_avail(nullptr);
entry_[count_++].fill(hprec);
hprec = next;
}
DCHECK(hprec == nullptr);
}
FOLLY_NOINLINE void evict(uint8_t num) {
DCHECK_GE(count_, num);
if (num == 0) {
return;
}
Rec* head = nullptr;
Rec* tail = nullptr;
for (uint8_t i = 0; i < num; ++i) {
entry_[--count_].evict();
Rec* rec = entry_[--count_].get();
DCHECK(rec);
rec->set_next_avail(head);
head = rec;
if (!tail) {
tail = rec;
}
}
DCHECK(head);
DCHECK(tail);
DCHECK(tail->next_avail() == nullptr);
hazard_pointer_default_domain<Atom>().release_hprecs(head, tail);
}
void evict() { evict(count()); }
......
......@@ -1530,7 +1530,7 @@ uint64_t tc_miss_bench(std::string name, int nthreads) {
auto repFn = [&] {
auto init = [] {};
auto fn = [&](int tid) {
for (int j = tid; j < ops / 1000; j += nthreads) {
for (int j = tid; j < ops; j += nthreads) {
// By using twice the TC capacity, each iteration does one
// filling and one eviction of the TC.
hazptr_array<C> a1 = make_hazard_pointer_array<C>();
......@@ -1567,7 +1567,7 @@ void benches() {
local_bench<3>("", i);
std::cout << "10x construct/destruct hazptr_array<9> ";
array_bench<9>("", i);
std::cout << "1/1000 TC hit + miss & overflow ";
std::cout << "TC hit + miss & overflow ";
tc_miss_bench("", i);
std::cout << "allocate/retire/reclaim object ";
obj_bench("", i);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment