Commit 0e92d3c2 authored by Maged Michael's avatar Maged Michael Committed by Facebook GitHub Bot

hazard pointers: Optimize allocation of hazard pointers from domain

Summary:
Optimize the management of available hazard pointers in the domain that are not in thread caches, by adding a linked list of available hazard pointers with a lock bit packed with the pointer to the head of the list. Pop operations are done in two atomic steps: (1) set lock bit, (2) pop hazard pointers and clear lock bit. Push operations are done in one atomic step, but can proceed only when the lock bit is clear.

Microbenchmark results (in a process with 10K+ hazard pointers) show reduction in the latency of constructing destroying two 9-hazard-pointer-arrays that involve one TC hit and one TC miss/overflow from hundreds of microseconds to tens of nanoseconds.

Before:
```
1/1000 TC hit + miss & overflow                   550 ns    502 ns    468 ns
```

After:
```
TC hit + miss & overflow                           49 ns     48 ns     48 ns
```

Reviewed By: yfeldblum

Differential Revision: D31102053

fbshipit-source-id: ae923bf3b05676e7572cca35179531e947846300
parent 4a7ac66b
...@@ -95,6 +95,7 @@ constexpr int hazptr_domain_rcount_threshold() { ...@@ -95,6 +95,7 @@ constexpr int hazptr_domain_rcount_threshold() {
template <template <typename> class Atom> template <template <typename> class Atom>
class hazptr_domain { class hazptr_domain {
using Obj = hazptr_obj<Atom>; using Obj = hazptr_obj<Atom>;
using Rec = hazptr_rec<Atom>;
using List = hazptr_detail::linked_list<Obj>; using List = hazptr_detail::linked_list<Obj>;
using ObjList = hazptr_obj_list<Atom>; using ObjList = hazptr_obj_list<Atom>;
using RetiredList = hazptr_detail::shared_head_only_list<Obj, Atom>; using RetiredList = hazptr_detail::shared_head_only_list<Obj, Atom>;
...@@ -106,6 +107,7 @@ class hazptr_domain { ...@@ -106,6 +107,7 @@ class hazptr_domain {
static constexpr int kListTooLarge = 100000; static constexpr int kListTooLarge = 100000;
static constexpr uint64_t kSyncTimePeriod{2000000000}; // nanoseconds static constexpr uint64_t kSyncTimePeriod{2000000000}; // nanoseconds
static constexpr uintptr_t kTagBit = hazptr_obj<Atom>::kTagBit; static constexpr uintptr_t kTagBit = hazptr_obj<Atom>::kTagBit;
static constexpr uintptr_t kLockBit = 1;
static constexpr int kIgnoredLowBits = 8; static constexpr int kIgnoredLowBits = 8;
static constexpr int kNumShards = 8; static constexpr int kNumShards = 8;
...@@ -117,7 +119,8 @@ class hazptr_domain { ...@@ -117,7 +119,8 @@ class hazptr_domain {
return &folly::QueuedImmediateExecutor::instance(); return &folly::QueuedImmediateExecutor::instance();
} }
Atom<hazptr_rec<Atom>*> hazptrs_{nullptr}; Atom<Rec*> hazptrs_{nullptr};
Atom<uintptr_t> avail_{reinterpret_cast<uintptr_t>(nullptr)};
Atom<uint64_t> sync_time_{0}; Atom<uint64_t> sync_time_{0};
/* Using signed int for rcount_ because it may transiently be negative. /* Using signed int for rcount_ because it may transiently be negative.
Using signed int for all integer variables that may be involved in Using signed int for all integer variables that may be involved in
...@@ -188,15 +191,16 @@ class hazptr_domain { ...@@ -188,15 +191,16 @@ class hazptr_domain {
// Call cleanup() to ensure that there is no lagging concurrent // Call cleanup() to ensure that there is no lagging concurrent
// asynchronous reclamation in progress. // asynchronous reclamation in progress.
cleanup(); cleanup();
auto rec = head(); Rec* rec = head();
while (rec) { while (rec) {
auto next = rec->next(); auto next = rec->next();
rec->~hazptr_rec<Atom>(); rec->~Rec();
hazptr_rec_alloc{}.deallocate(rec, 1); hazptr_rec_alloc{}.deallocate(rec, 1);
rec = next; rec = next;
} }
hazptrs_.store(nullptr); hazptrs_.store(nullptr);
hcount_.store(0); hcount_.store(0);
avail_.store(reinterpret_cast<uintptr_t>(nullptr));
} }
/** cleanup_cohort_tag */ /** cleanup_cohort_tag */
...@@ -224,13 +228,13 @@ class hazptr_domain { ...@@ -224,13 +228,13 @@ class hazptr_domain {
} }
private: private:
using hazptr_rec_alloc = AlignedSysAllocator< using hazptr_rec_alloc = AlignedSysAllocator<Rec, FixedAlign<alignof(Rec)>>;
hazptr_rec<Atom>,
FixedAlign<alignof(hazptr_rec<Atom>)>>;
friend void hazptr_domain_push_retired<Atom>( friend void hazptr_domain_push_retired<Atom>(
hazptr_obj_list<Atom>&, hazptr_domain<Atom>&) noexcept; hazptr_obj_list<Atom>&, hazptr_domain<Atom>&) noexcept;
friend hazptr_holder<Atom> make_hazard_pointer<Atom>(hazptr_domain<Atom>&); friend hazptr_holder<Atom> make_hazard_pointer<Atom>(hazptr_domain<Atom>&);
template <uint8_t M, template <typename> class A>
friend hazptr_array<M, A> make_hazard_pointer_array();
friend class hazptr_holder<Atom>; friend class hazptr_holder<Atom>;
friend class hazptr_obj<Atom>; friend class hazptr_obj<Atom>;
friend class hazptr_obj_cohort<Atom>; friend class hazptr_obj_cohort<Atom>;
...@@ -278,14 +282,48 @@ class hazptr_domain { ...@@ -278,14 +282,48 @@ class hazptr_domain {
num_bulk_reclaims_.fetch_sub(1, std::memory_order_release); num_bulk_reclaims_.fetch_sub(1, std::memory_order_release);
} }
/** hprec_acquire */ uintptr_t load_avail() { return avail_.load(std::memory_order_acquire); }
hazptr_rec<Atom>* hprec_acquire() {
auto rec = try_acquire_existing_hprec(); void store_avail(uintptr_t val) {
return rec != nullptr ? rec : acquire_new_hprec(); avail_.store(val, std::memory_order_release);
} }
/** hprec_release */ bool cas_avail(uintptr_t& expval, uintptr_t newval) {
void hprec_release(hazptr_rec<Atom>* hprec) noexcept { hprec->release(); } return avail_.compare_exchange_weak(
expval, newval, std::memory_order_acq_rel, std::memory_order_acquire);
}
/** acquire_hprecs */
Rec* acquire_hprecs(uint8_t num) {
DCHECK_GE(num, 1);
// C++17: auto [n, head] = try_pop_available_hprecs(num);
uint8_t n;
Rec* head;
std::tie(n, head) = try_pop_available_hprecs(num);
for (; n < num; ++n) {
Rec* rec = create_new_hprec();
DCHECK(rec->next_avail() == nullptr);
rec->set_next_avail(head);
head = rec;
}
DCHECK(head);
return head;
}
/** release_hprec */
void release_hprec(Rec* hprec) noexcept {
DCHECK(hprec);
DCHECK(hprec->next_avail() == nullptr);
push_available_hprecs(hprec, hprec);
}
/** release_hprecs */
void release_hprecs(Rec* head, Rec* tail) noexcept {
DCHECK(head);
DCHECK(tail);
DCHECK(tail->next_avail() == nullptr);
push_available_hprecs(head, tail);
}
/** push_list */ /** push_list */
void push_list(ObjList& l) { void push_list(ObjList& l) {
...@@ -523,7 +561,7 @@ class hazptr_domain { ...@@ -523,7 +561,7 @@ class hazptr_domain {
} }
} }
hazptr_rec<Atom>* head() const noexcept { Rec* head() const noexcept {
return hazptrs_.load(std::memory_order_acquire); return hazptrs_.load(std::memory_order_acquire);
} }
...@@ -555,8 +593,7 @@ class hazptr_domain { ...@@ -555,8 +593,7 @@ class hazptr_domain {
auto rec = head(); auto rec = head();
while (rec) { while (rec) {
auto next = rec->next(); auto next = rec->next();
DCHECK(!rec->active()); rec->~Rec();
rec->~hazptr_rec<Atom>();
hazptr_rec_alloc{}.deallocate(rec, 1); hazptr_rec_alloc{}.deallocate(rec, 1);
rec = next; rec = next;
} }
...@@ -568,22 +605,92 @@ class hazptr_domain { ...@@ -568,22 +605,92 @@ class hazptr_domain {
} }
} }
hazptr_rec<Atom>* try_acquire_existing_hprec() { std::pair<uint8_t, Rec*> try_pop_available_hprecs(uint8_t num) {
auto rec = head(); DCHECK_GE(num, 1);
while (true) {
uintptr_t avail = load_avail();
if (avail == reinterpret_cast<uintptr_t>(nullptr)) {
return {0, nullptr};
}
if ((avail & kLockBit) == 0) {
// Try to lock avail list
if (cas_avail(avail, avail | kLockBit)) {
// Lock acquired
Rec* head = reinterpret_cast<Rec*>(avail);
uint8_t nn = pop_available_hprecs_release_lock(num, head);
// Lock released
DCHECK_GE(nn, 1);
DCHECK_LE(nn, num);
return {nn, head};
}
} else {
std::this_thread::yield();
}
}
}
uint8_t pop_available_hprecs_release_lock(uint8_t num, Rec* head) {
// Lock already acquired
DCHECK_GE(num, 1);
DCHECK(head);
Rec* tail = head;
uint8_t nn = 1;
Rec* next = tail->next_avail();
while ((next != nullptr) && (nn < num)) {
DCHECK_EQ(reinterpret_cast<uintptr_t>(next) & kLockBit, 0);
tail = next;
next = tail->next_avail();
++nn;
}
uintptr_t newval = reinterpret_cast<uintptr_t>(next);
DCHECK_EQ(newval & kLockBit, 0);
// Release lock
store_avail(newval);
tail->set_next_avail(nullptr);
return nn;
}
void push_available_hprecs(Rec* head, Rec* tail) {
DCHECK(head);
DCHECK(tail);
DCHECK(tail->next_avail() == nullptr);
if (kIsDebug) {
dcheck_connected(head, tail);
}
uintptr_t newval = reinterpret_cast<uintptr_t>(head);
DCHECK_EQ(newval & kLockBit, 0);
while (true) {
uintptr_t avail = load_avail();
if ((avail & kLockBit) == 0) {
// Try to push if unlocked
auto next = reinterpret_cast<Rec*>(avail);
tail->set_next_avail(next);
if (cas_avail(avail, newval)) {
break;
}
} else {
std::this_thread::yield();
}
}
}
void dcheck_connected(Rec* head, Rec* tail) {
Rec* rec = head;
bool connected = false;
while (rec) { while (rec) {
auto next = rec->next(); Rec* next = rec->next_avail();
if (rec->try_acquire()) { if (rec == tail) {
return rec; connected = true;
DCHECK(next == nullptr);
} }
rec = next; rec = next;
} }
return nullptr; DCHECK(connected);
} }
hazptr_rec<Atom>* acquire_new_hprec() { Rec* create_new_hprec() {
auto rec = hazptr_rec_alloc{}.allocate(1); auto rec = hazptr_rec_alloc{}.allocate(1);
new (rec) hazptr_rec<Atom>(); new (rec) Rec();
rec->set_active();
rec->set_domain(this); rec->set_domain(this);
while (true) { while (true) {
auto h = head(); auto h = head();
......
...@@ -89,7 +89,7 @@ class hazptr_holder { ...@@ -89,7 +89,7 @@ class hazptr_holder {
} }
} }
#endif #endif
domain->hprec_release(hprec_); domain->release_hprec(hprec_);
} }
} }
...@@ -189,7 +189,9 @@ FOLLY_ALWAYS_INLINE hazptr_holder<Atom> make_hazard_pointer( ...@@ -189,7 +189,9 @@ FOLLY_ALWAYS_INLINE hazptr_holder<Atom> make_hazard_pointer(
} }
} }
#endif #endif
auto hprec = domain.hprec_acquire(); auto hprec = domain.acquire_hprecs(1);
DCHECK(hprec);
DCHECK(hprec->next_avail() == nullptr);
return hazptr_holder<Atom>(hprec); return hazptr_holder<Atom>(hprec);
} }
...@@ -331,9 +333,15 @@ FOLLY_ALWAYS_INLINE hazptr_array<M, Atom> make_hazard_pointer_array() { ...@@ -331,9 +333,15 @@ FOLLY_ALWAYS_INLINE hazptr_array<M, Atom> make_hazard_pointer_array() {
} }
tc.set_count(offset); tc.set_count(offset);
#else #else
auto hprec = hazard_pointer_default_domain<Atom>().acquire_hprecs(M);
for (uint8_t i = 0; i < M; ++i) { for (uint8_t i = 0; i < M; ++i) {
new (&h[i]) hazptr_holder<Atom>(make_hazard_pointer<Atom>()); DCHECK(hprec);
auto next = hprec->next_avail();
hprec->set_next_avail(nullptr);
new (&h[i]) hazptr_holder<Atom>(hprec);
hprec = next;
} }
DCHECK(hprec == nullptr);
#endif #endif
a.empty_ = false; a.empty_ = false;
return a; return a;
......
...@@ -32,12 +32,17 @@ template <template <typename> class Atom> ...@@ -32,12 +32,17 @@ template <template <typename> class Atom>
class alignas(hardware_destructive_interference_size) hazptr_rec { class alignas(hardware_destructive_interference_size) hazptr_rec {
Atom<const void*> hazptr_{nullptr}; // the hazard pointer Atom<const void*> hazptr_{nullptr}; // the hazard pointer
hazptr_domain<Atom>* domain_; hazptr_domain<Atom>* domain_;
hazptr_rec* next_; hazptr_rec* next_; // Next in the main hazard pointer list. Immutable.
Atom<bool> active_{false}; hazptr_rec* nextAvail_{nullptr}; // Next available hazard pointer.
friend class hazptr_domain<Atom>; friend class hazptr_domain<Atom>;
friend class hazptr_holder<Atom>; friend class hazptr_holder<Atom>;
friend class hazptr_tc_entry<Atom>; #if FOLLY_HAZPTR_THR_LOCAL
friend class hazptr_tc<Atom>;
#endif
friend hazptr_holder<Atom> make_hazard_pointer<Atom>(hazptr_domain<Atom>&);
template <uint8_t M, template <typename> class A>
friend hazptr_array<M, A> make_hazard_pointer_array();
const void* hazptr() const noexcept { const void* hazptr() const noexcept {
return hazptr_.load(std::memory_order_acquire); return hazptr_.load(std::memory_order_acquire);
...@@ -47,25 +52,14 @@ class alignas(hardware_destructive_interference_size) hazptr_rec { ...@@ -47,25 +52,14 @@ class alignas(hardware_destructive_interference_size) hazptr_rec {
hazptr_.store(p, std::memory_order_release); hazptr_.store(p, std::memory_order_release);
} }
bool active() const noexcept {
return active_.load(std::memory_order_acquire);
}
void set_active() noexcept { active_.store(true, std::memory_order_relaxed); }
bool try_acquire() noexcept {
bool a = active();
return !a &&
active_.compare_exchange_strong(
a, true, std::memory_order_release, std::memory_order_relaxed);
}
void release() noexcept { active_.store(false, std::memory_order_release); }
hazptr_rec<Atom>* next() { return next_; } hazptr_rec<Atom>* next() { return next_; }
hazptr_rec<Atom>* next_avail() { return nextAvail_; }
void set_next(hazptr_rec<Atom>* rec) { next_ = rec; } void set_next(hazptr_rec<Atom>* rec) { next_ = rec; }
void set_next_avail(hazptr_rec<Atom>* rec) { nextAvail_ = rec; }
FOLLY_ALWAYS_INLINE hazptr_domain<Atom>* domain() { return domain_; } FOLLY_ALWAYS_INLINE hazptr_domain<Atom>* domain() { return domain_; }
void set_domain(hazptr_domain<Atom>* dom) { domain_ = dom; } void set_domain(hazptr_domain<Atom>* dom) { domain_ = dom; }
......
...@@ -56,8 +56,6 @@ class hazptr_tc_entry { ...@@ -56,8 +56,6 @@ class hazptr_tc_entry {
} }
FOLLY_ALWAYS_INLINE hazptr_rec<Atom>* get() const noexcept { return hprec_; } FOLLY_ALWAYS_INLINE hazptr_rec<Atom>* get() const noexcept { return hprec_; }
void evict() { hprec_->release(); }
}; // hazptr_tc_entry }; // hazptr_tc_entry
/** /**
...@@ -74,15 +72,13 @@ class hazptr_tc { ...@@ -74,15 +72,13 @@ class hazptr_tc {
bool local_{false}; // for debug mode only bool local_{false}; // for debug mode only
public: public:
~hazptr_tc() { ~hazptr_tc() { evict(count()); }
for (uint8_t i = 0; i < count(); ++i) {
entry_[i].evict();
}
}
static constexpr uint8_t capacity() noexcept { return kCapacity; } static constexpr uint8_t capacity() noexcept { return kCapacity; }
private: private:
using Rec = hazptr_rec<Atom>;
template <uint8_t, template <typename> class> template <uint8_t, template <typename> class>
friend class hazptr_array; friend class hazptr_array;
friend class hazptr_holder<Atom>; friend class hazptr_holder<Atom>;
...@@ -122,17 +118,37 @@ class hazptr_tc { ...@@ -122,17 +118,37 @@ class hazptr_tc {
FOLLY_NOINLINE void fill(uint8_t num) { FOLLY_NOINLINE void fill(uint8_t num) {
DCHECK_LE(count_ + num, capacity()); DCHECK_LE(count_ + num, capacity());
auto& domain = default_hazptr_domain<Atom>(); auto& domain = default_hazptr_domain<Atom>();
Rec* hprec = domain.acquire_hprecs(num);
for (uint8_t i = 0; i < num; ++i) { for (uint8_t i = 0; i < num; ++i) {
auto hprec = domain.hprec_acquire(); DCHECK(hprec);
Rec* next = hprec->next_avail();
hprec->set_next_avail(nullptr);
entry_[count_++].fill(hprec); entry_[count_++].fill(hprec);
hprec = next;
} }
DCHECK(hprec == nullptr);
} }
FOLLY_NOINLINE void evict(uint8_t num) { FOLLY_NOINLINE void evict(uint8_t num) {
DCHECK_GE(count_, num); DCHECK_GE(count_, num);
if (num == 0) {
return;
}
Rec* head = nullptr;
Rec* tail = nullptr;
for (uint8_t i = 0; i < num; ++i) { for (uint8_t i = 0; i < num; ++i) {
entry_[--count_].evict(); Rec* rec = entry_[--count_].get();
DCHECK(rec);
rec->set_next_avail(head);
head = rec;
if (!tail) {
tail = rec;
}
} }
DCHECK(head);
DCHECK(tail);
DCHECK(tail->next_avail() == nullptr);
hazard_pointer_default_domain<Atom>().release_hprecs(head, tail);
} }
void evict() { evict(count()); } void evict() { evict(count()); }
......
...@@ -1530,7 +1530,7 @@ uint64_t tc_miss_bench(std::string name, int nthreads) { ...@@ -1530,7 +1530,7 @@ uint64_t tc_miss_bench(std::string name, int nthreads) {
auto repFn = [&] { auto repFn = [&] {
auto init = [] {}; auto init = [] {};
auto fn = [&](int tid) { auto fn = [&](int tid) {
for (int j = tid; j < ops / 1000; j += nthreads) { for (int j = tid; j < ops; j += nthreads) {
// By using twice the TC capacity, each iteration does one // By using twice the TC capacity, each iteration does one
// filling and one eviction of the TC. // filling and one eviction of the TC.
hazptr_array<C> a1 = make_hazard_pointer_array<C>(); hazptr_array<C> a1 = make_hazard_pointer_array<C>();
...@@ -1567,7 +1567,7 @@ void benches() { ...@@ -1567,7 +1567,7 @@ void benches() {
local_bench<3>("", i); local_bench<3>("", i);
std::cout << "10x construct/destruct hazptr_array<9> "; std::cout << "10x construct/destruct hazptr_array<9> ";
array_bench<9>("", i); array_bench<9>("", i);
std::cout << "1/1000 TC hit + miss & overflow "; std::cout << "TC hit + miss & overflow ";
tc_miss_bench("", i); tc_miss_bench("", i);
std::cout << "allocate/retire/reclaim object "; std::cout << "allocate/retire/reclaim object ";
obj_bench("", i); obj_bench("", i);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment