Commit 1d83c51f authored by Maged Michael's avatar Maged Michael Committed by Facebook Github Bot

Add thread caching of hazard pointers. Benchmarks. Minor fixes, optimizations, and refactoring.

Summary:
Added support for thread caching of hazard pointers.
Added thread caching benchmarks.
Removed function call from hazptr_domain constexpr constructor.
Optimizations of memory order and code refactoring.

Reviewed By: davidtgoldblatt

Differential Revision: D5249070

fbshipit-source-id: 487fb23abccde228c3c726de4ac8e9f07bfa9498
parent 4ecd9abf
/*
* Copyright 2017 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define HAZPTR_AMB true
#define HAZPTR_TC false
#include <folly/experimental/hazptr/bench/HazptrBench.h>
#include <folly/portability/GFlags.h>
#include <folly/portability/GTest.h>
using namespace folly::hazptr;
int nthreads;
int size;
BENCHMARK(amb, iters) {
run_once(nthreads, size, iters);
}
BENCHMARK(amb_dup, iters) {
run_once(nthreads, size, iters);
}
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
gflags::ParseCommandLineFlags(&argc, &argv, true);
std::cout << "---------------------------------------------- AMB - No TC\n";
for (int i : nthr) {
nthreads = i;
for (int j : sizes) {
size = j;
std::cout << i << " threads -- " << j << "-item list" << std::endl;
bench("amb - no tc ", i, j, 0);
bench("amb - no tc - dup ", i, j, 0);
}
}
std::cout << "----------------------------------------------------------\n";
}
......@@ -15,6 +15,7 @@
*/
#define HAZPTR_AMB true
#define HAZPTR_TC true
#include <folly/experimental/hazptr/bench/HazptrBench.h>
#include <folly/portability/GFlags.h>
......@@ -36,14 +37,14 @@ BENCHMARK(amb_dup, iters) {
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
gflags::ParseCommandLineFlags(&argc, &argv, true);
std::cout << "------------------------------------------------------ AMB\n";
std::cout << "------------------------------------------------- AMB - TC\n";
for (int i : nthr) {
nthreads = i;
for (int j : sizes) {
size = j;
std::cout << i << " threads -- " << j << "-item list" << std::endl;
bench("amb ", i, j, 0);
bench("amb - dup ", i, j, 0);
bench("amb - tc ", i, j, 0);
bench("amb - tc - dup ", i, j, 0);
}
}
std::cout << "----------------------------------------------------------\n";
......
/*
* Copyright 2017 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define HAZPTR_AMB false
#define HAZPTR_TC false
#include <folly/experimental/hazptr/bench/HazptrBench.h>
#include <folly/portability/GFlags.h>
#include <folly/portability/GTest.h>
using namespace folly::hazptr;
int nthreads;
int size;
BENCHMARK(no_amb, iters) {
run_once(nthreads, size, iters);
}
BENCHMARK(no_amb_dup, iters) {
run_once(nthreads, size, iters);
}
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
gflags::ParseCommandLineFlags(&argc, &argv, true);
std::cout << "------------------------------------------- No AMB - No Tc\n";
for (int i : nthr) {
nthreads = i;
for (int j : sizes) {
size = j;
std::cout << i << " threads -- " << j << "-item list" << std::endl;
bench("no amb - no tc ", i, j, 0);
bench("no amb - no tc - dup ", i, j, 0);
}
}
std::cout << "----------------------------------------------------------\n";
}
......@@ -15,6 +15,7 @@
*/
#define HAZPTR_AMB false
#define HAZPTR_TC true
#include <folly/experimental/hazptr/bench/HazptrBench.h>
#include <folly/portability/GFlags.h>
......@@ -36,14 +37,14 @@ BENCHMARK(no_amb_dup, iters) {
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
gflags::ParseCommandLineFlags(&argc, &argv, true);
std::cout << "--------------------------------------------------- No AMB\n";
std::cout << "---------------------------------------------- No AMB - TC\n";
for (int i : nthr) {
nthreads = i;
for (int j : sizes) {
size = j;
std::cout << i << " threads -- " << j << "-item list" << std::endl;
bench("no amb ", i, j, 0);
bench("no amb - dup ", i, j, 0);
bench("no amb - tc ", i, j, 0);
bench("no amb - tc - dup ", i, j, 0);
}
}
std::cout << "----------------------------------------------------------\n";
......
......@@ -33,6 +33,8 @@ inline uint64_t run_once(int nthreads, int size, int ops) {
std::atomic<bool> start{false};
std::atomic<int> started{0};
hazptr_owner<void> dummy_hptr[100];
for (int i = 0; i < size; ++i) {
s.add(i);
}
......@@ -107,32 +109,60 @@ const std::string header = "Test_name, Max time, Avg time, Min time";
} // namespace hazptr {
/*
--------------------------------------------------- No AMB
------------------------------------------- No AMB - No Tc
1 threads -- 10-item list
no amb - no tc 756 ns 688 ns 674 ns
no amb - no tc - dup 725 ns 688 ns 676 ns
1 threads -- 100-item list
no amb - no tc 2469 ns 2366 ns 2334 ns
no amb - no tc - dup 2404 ns 2353 ns 2328 ns
10 threads -- 10-item list
no amb - no tc 802 ns 764 ns 750 ns
no amb - no tc - dup 798 ns 776 ns 733 ns
10 threads -- 100-item list
no amb - no tc 2209 ns 2157 ns 2118 ns
no amb - no tc - dup 2266 ns 2152 ns 1993 ns
----------------------------------------------------------
---------------------------------------------- AMB - No TC
1 threads -- 10-item list
amb - no tc 554 ns 538 ns 525 ns
amb - no tc - dup 540 ns 530 ns 524 ns
1 threads -- 100-item list
amb - no tc 731 ns 721 ns 715 ns
amb - no tc - dup 745 ns 724 ns 714 ns
10 threads -- 10-item list
amb - no tc 777 ns 717 ns 676 ns
amb - no tc - dup 726 ns 669 ns 638 ns
10 threads -- 100-item list
amb - no tc 1015 ns 985 ns 955 ns
amb - no tc - dup 1000 ns 978 ns 952 ns
----------------------------------------------------------
---------------------------------------------- No AMB - TC
1 threads -- 10-item list
no amb 210 ns 204 ns 202 ns
no amb - dup 213 ns 207 ns 203 ns
no amb - tc 209 ns 203 ns 199 ns
no amb - tc - dup 210 ns 202 ns 196 ns
1 threads -- 100-item list
no amb 1862 ns 1810 ns 1778 ns
no amb - dup 1791 ns 1785 ns 1777 ns
no amb - tc 1872 ns 1849 ns 1840 ns
no amb - tc - dup 1902 ns 1865 ns 1838 ns
10 threads -- 10-item list
no amb 227 ns 161 ns 143 ns
no amb - dup 145 ns 144 ns 143 ns
no amb - tc 136 ns 50 ns 23 ns
no amb - tc - dup 178 ns 85 ns 23 ns
10 threads -- 100-item list
no amb 520 ns 518 ns 515 ns
no amb - dup 684 ns 536 ns 516 ns
no amb - tc 1594 ns 651 ns 201 ns
no amb - tc - dup 1492 ns 615 ns 203 ns
----------------------------------------------------------
------------------------------------------------------ AMB
------------------------------------------------- AMB - TC
1 threads -- 10-item list
amb 48 ns 46 ns 45 ns
amb - dup 47 ns 45 ns 45 ns
amb - tc 45 ns 44 ns 44 ns
amb - tc - dup 46 ns 46 ns 45 ns
1 threads -- 100-item list
amb 242 ns 236 ns 234 ns
amb - dup 243 ns 238 ns 234 ns
amb - tc 256 ns 246 ns 240 ns
amb - tc - dup 242 ns 240 ns 238 ns
10 threads -- 10-item list
amb 226 ns 130 ns 109 ns
amb - dup 161 ns 115 ns 109 ns
amb - tc 120 ns 35 ns 13 ns
amb - tc - dup 104 ns 34 ns 9 ns
10 threads -- 100-item list
amb 192 ns 192 ns 191 ns
amb - dup 416 ns 324 ns 192 ns
amb - tc 267 ns 129 ns 49 ns
amb - tc - dup 766 ns 147 ns 42 ns
----------------------------------------------------------
*/
......@@ -57,11 +57,11 @@ class SWMRListSet {
/* Used by the single writer */
void locate_lower_bound(const T& v, std::atomic<Node*>*& prev) const {
auto curr = prev->load();
auto curr = prev->load(std::memory_order_relaxed);
while (curr) {
if (curr->elem_ >= v) break;
prev = &(curr->next_);
curr = curr->next_.load();
curr = curr->next_.load(std::memory_order_relaxed);
}
return;
}
......@@ -81,7 +81,7 @@ class SWMRListSet {
bool add(T v) {
auto prev = &head_;
locate_lower_bound(v, prev);
auto curr = prev->load();
auto curr = prev->load(std::memory_order_relaxed);
if (curr && curr->elem_ == v) return false;
prev->store(new Node(std::move(v), curr));
return true;
......@@ -90,11 +90,13 @@ class SWMRListSet {
bool remove(const T& v) {
auto prev = &head_;
locate_lower_bound(v, prev);
auto curr = prev->load();
auto curr = prev->load(std::memory_order_relaxed);
if (!curr || curr->elem_ != v) return false;
Node *curr_next = curr->next_.load();
prev->store(curr_next); // Patch up the actual list...
curr->next_.store(nullptr); // ...and only then null out the removed node.
// Patch up the actual list...
prev->store(curr_next, std::memory_order_release);
// ...and only then null out the removed node.
curr->next_.store(nullptr, std::memory_order_release);
curr->retire(domain_);
return true;
}
......@@ -105,13 +107,14 @@ class SWMRListSet {
hazptr_owner<Node> hptr_curr(domain_);
while (true) {
auto prev = &head_;
auto curr = prev->load();
auto curr = prev->load(std::memory_order_acquire);
while (true) {
if (!curr) { return false; }
if (!hptr_curr.try_protect(curr, *prev))
break;
auto next = curr->next_.load();
if (prev->load() != curr) break;
auto next = curr->next_.load(std::memory_order_acquire);
if (prev->load(std::memory_order_acquire) != curr)
break;
if (curr->elem_ == val) {
return true;
} else if (!(curr->elem_ < val)) {
......
......@@ -29,6 +29,14 @@
#define HAZPTR_AMB true
#endif
#ifndef HAZPTR_TC
#define HAZPTR_TC true
#endif
#ifndef HAZPTR_TC_SIZE
#define HAZPTR_TC_SIZE 2
#endif
#ifndef HAZPTR_SCAN_MULT
#define HAZPTR_SCAN_MULT 2
#endif
......@@ -45,7 +53,8 @@
#include <folly/experimental/AsymmetricMemoryBarrier.h>
#include <folly/experimental/hazptr/debug.h>
#include <unordered_set>
#include <mutex> // for thread caching
#include <unordered_set> // for hash set in bulk reclamation
namespace folly {
namespace hazptr {
......@@ -67,15 +76,44 @@ class hazptr_mb {
static void heavy();
};
/** hazptr_tc */
class hazptr_tc_entry {
friend class hazptr_tc;
std::atomic<hazptr_rec*> hprec_{nullptr};
std::atomic<hazptr_domain*> domain_{nullptr};
public:
void fill(hazptr_rec* hprec, hazptr_domain* domain);
hazptr_rec* get(hazptr_domain* domain);
void invalidate(hazptr_rec* hprec);
void evict();
};
class hazptr_tc {
hazptr_tc_entry tc_[HAZPTR_TC_SIZE];
public:
hazptr_tc();
~hazptr_tc();
hazptr_rec* get(hazptr_domain* domain);
bool put(hazptr_rec* hprec, hazptr_domain* domain);
};
hazptr_tc& hazptr_tc();
std::mutex& hazptr_tc_lock();
using hazptr_tc_guard = std::lock_guard<std::mutex>;
/**
* public functions
*/
/** hazptr_domain */
constexpr hazptr_domain::hazptr_domain(memory_resource* mr) noexcept : mr_(mr) {
hazptr_stats();
}
constexpr hazptr_domain::hazptr_domain(memory_resource* mr) noexcept
: mr_(mr) {}
/** hazptr_obj_base */
......@@ -95,15 +133,19 @@ inline void hazptr_obj_base<T, D>::retire(hazptr_domain& domain, D deleter) {
class hazptr_rec {
friend class hazptr_domain;
friend class hazptr_tc_entry;
template <typename> friend class hazptr_owner;
std::atomic<const void*> hazptr_ = {nullptr};
hazptr_rec* next_ = {nullptr};
std::atomic<bool> active_ = {false};
std::atomic<const void*> hazptr_{nullptr};
hazptr_rec* next_{nullptr};
std::atomic<hazptr_tc_entry*> tc_{nullptr};
std::atomic<bool> active_{false};
void set(const void* p) noexcept;
const void* get() const noexcept;
void clear() noexcept;
bool isActive() noexcept;
bool tryAcquire() noexcept;
void release() noexcept;
};
......@@ -182,9 +224,9 @@ inline void swap(hazptr_owner<T>& lhs, hazptr_owner<T>& rhs) noexcept {
////////////////////////////////////////////////////////////////////////////////
// [TODO]:
// - Thread caching of hazptr_rec-s
// - Private storage of retired objects
// - Control of reclamation (when and by whom)
// - End-to-end lock-free implementation
/** Definition of default_hazptr_domain() */
inline hazptr_domain& default_hazptr_domain() {
......@@ -211,6 +253,21 @@ inline void hazptr_rec::clear() noexcept {
hazptr_.store(nullptr, std::memory_order_release);
}
inline bool hazptr_rec::isActive() noexcept {
return active_.load(std::memory_order_acquire);
}
inline bool hazptr_rec::tryAcquire() noexcept {
bool active = isActive();
if (!active &&
active_.compare_exchange_strong(
active, true, std::memory_order_release, std::memory_order_relaxed)) {
DEBUG_PRINT(this);
return true;
}
return false;
}
inline void hazptr_rec::release() noexcept {
DEBUG_PRINT(this);
clear();
......@@ -243,49 +300,56 @@ inline hazptr_domain::~hazptr_domain() {
hazptr_rec* next;
for (auto p = hazptrs_.load(std::memory_order_acquire); p; p = next) {
next = p->next_;
if (HAZPTR_TC) {
if (p->isActive()) {
hazptr_tc_guard g(hazptr_tc_lock());
if (p->isActive()) {
auto tc = p->tc_.load(std::memory_order_acquire);
DCHECK(tc != nullptr);
tc->invalidate(p);
}
}
} else {
CHECK(!p->isActive());
}
mr_->deallocate(static_cast<void*>(p), sizeof(hazptr_rec));
}
}
}
inline hazptr_rec* hazptr_domain::hazptrAcquire() {
if (HAZPTR_TC) {
hazptr_rec* hprec = hazptr_tc().get(this);
if (hprec) {
return hprec;
}
}
hazptr_rec* p;
hazptr_rec* next;
for (p = hazptrs_.load(std::memory_order_acquire); p; p = next) {
next = p->next_;
bool active = p->active_.load(std::memory_order_acquire);
if (!active) {
if (p->active_.compare_exchange_weak(
active,
true,
std::memory_order_release,
std::memory_order_relaxed)) {
DEBUG_PRINT(this << " " << p);
if (p->tryAcquire()) {
return p;
}
}
}
p = static_cast<hazptr_rec*>(mr_->allocate(sizeof(hazptr_rec)));
if (p == nullptr) {
return nullptr;
}
p->active_.store(true, std::memory_order_relaxed);
p->next_ = hazptrs_.load(std::memory_order_acquire);
do {
if (hazptrs_.compare_exchange_weak(
p->next_,
p,
std::memory_order_release,
std::memory_order_acquire)) {
break;
}
} while (true);
while (!hazptrs_.compare_exchange_weak(
p->next_, p, std::memory_order_release, std::memory_order_acquire))
/* keep trying */;
auto hcount = hcount_.fetch_add(1);
DEBUG_PRINT(this << " " << p << " " << sizeof(hazptr_rec) << " " << hcount);
return p;
}
inline void hazptr_domain::hazptrRelease(hazptr_rec* p) noexcept {
if (HAZPTR_TC && hazptr_tc().put(p, this)) {
return;
}
DEBUG_PRINT(this << " " << p);
p->release();
}
......@@ -334,7 +398,7 @@ inline void hazptr_domain::bulkReclaim() {
auto h = hazptrs_.load(std::memory_order_acquire);
std::unordered_set<const void*> hs; // TODO lock-free alternative
for (; h; h = h->next_) {
hs.insert(h->hazptr_.load(std::memory_order_relaxed));
hs.insert(h->get());
}
int rcount = 0;
hazptr_obj* retired = nullptr;
......@@ -428,5 +492,103 @@ inline void hazptr_mb::heavy() {
}
}
/** hazptr_tc - functions */
inline void hazptr_tc_entry::fill(hazptr_rec* hprec, hazptr_domain* domain) {
hprec_.store(hprec, std::memory_order_release);
domain_.store(domain, std::memory_order_release);
hprec->tc_.store(this, std::memory_order_release);
DEBUG_PRINT(this << " " << domain << " " << hprec);
}
inline hazptr_rec* hazptr_tc_entry::get(hazptr_domain* domain) {
if (domain == domain_.load(std::memory_order_acquire)) {
auto hprec = hprec_.load(std::memory_order_acquire);
if (hprec) {
hprec_.store(nullptr, std::memory_order_release);
DEBUG_PRINT(this << " " << domain << " " << hprec);
return hprec;
}
}
return nullptr;
}
inline void hazptr_tc_entry::invalidate(hazptr_rec* hprec) {
DCHECK_EQ(hprec, hprec_);
hprec_.store(nullptr, std::memory_order_release);
domain_.store(nullptr, std::memory_order_release);
hprec->tc_.store(nullptr, std::memory_order_relaxed);
hprec->release();
DEBUG_PRINT(this << " " << hprec);
}
inline void hazptr_tc_entry::evict() {
auto hprec = hprec_.load(std::memory_order_relaxed);
if (hprec) {
hazptr_tc_guard g(hazptr_tc_lock());
hprec = hprec_.load(std::memory_order_relaxed);
if (hprec) {
invalidate(hprec);
}
}
DEBUG_PRINT(hprec);
}
inline hazptr_tc::hazptr_tc() {
DEBUG_PRINT(this);
}
inline hazptr_tc::~hazptr_tc() {
DEBUG_PRINT(this);
for (int i = 0; i < HAZPTR_TC_SIZE; ++i) {
tc_[i].evict();
}
}
inline hazptr_rec* hazptr_tc::get(hazptr_domain* domain) {
for (int i = 0; i < HAZPTR_TC_SIZE; ++i) {
auto hprec = tc_[i].get(domain);
if (hprec) {
DEBUG_PRINT(this << " " << domain << " " << hprec);
return hprec;
}
}
DEBUG_PRINT(this << " " << domain << " nullptr");
return nullptr;
}
inline bool hazptr_tc::put(hazptr_rec* hprec, hazptr_domain* domain) {
int other = HAZPTR_TC_SIZE;
for (int i = 0; i < HAZPTR_TC_SIZE; ++i) {
if (tc_[i].hprec_.load(std::memory_order_acquire) == nullptr) {
tc_[i].fill(hprec, domain);
DEBUG_PRINT(this << " " << i);
return true;
}
if (tc_[i].domain_.load(std::memory_order_relaxed) != domain) {
other = i;
}
}
if (other < HAZPTR_TC_SIZE) {
tc_[other].evict();
tc_[other].fill(hprec, domain);
DEBUG_PRINT(this << " " << other);
return true;
}
return false;
}
inline class hazptr_tc& hazptr_tc() {
static thread_local class hazptr_tc tc;
DEBUG_PRINT(&tc);
return tc;
}
inline std::mutex& hazptr_tc_lock() {
static std::mutex m;
DEBUG_PRINT(&m);
return m;
}
} // namespace folly
} // namespace hazptr
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment