Commit 3c118217 authored by Maged Michael's avatar Maged Michael Committed by Facebook Github Bot

Hazard pointers: Optimize memory order, add bulk reclamation control, add...

Hazard pointers: Optimize memory order, add bulk reclamation control, add stats, add implementation switches, add benchmarks.

Summary: Make the implementation partially performant by optimizing memory order and using asymmetric memory barrier for full fences. Also added more control for bulk reclamation implementation, stats, and quality of implementation switches.

Reviewed By: djwatson

Differential Revision: D5129614

fbshipit-source-id: c597edf711fdc8f16f80523bd8cae42c51fbe140
parent a88d0d72
/*
* Copyright 2017 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define HAZPTR_AMB true
#include <folly/experimental/hazptr/bench/HazptrBench.h>
#include <folly/portability/GFlags.h>
#include <folly/portability/GTest.h>
using namespace folly::hazptr;
int nthreads;
int size;
BENCHMARK(amb, iters) {
run_once(nthreads, size, iters);
}
BENCHMARK(amb_dup, iters) {
run_once(nthreads, size, iters);
}
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
gflags::ParseCommandLineFlags(&argc, &argv, true);
std::cout << "------------------------------------------------------ AMB\n";
for (int i : nthr) {
nthreads = i;
for (int j : sizes) {
size = j;
std::cout << i << " threads -- " << j << "-item list" << std::endl;
bench("amb ", i, j, 0);
bench("amb - dup ", i, j, 0);
}
}
std::cout << "----------------------------------------------------------\n";
}
/*
* Copyright 2017 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define HAZPTR_AMB false
#include <folly/experimental/hazptr/bench/HazptrBench.h>
#include <folly/portability/GFlags.h>
#include <folly/portability/GTest.h>
using namespace folly::hazptr;
int nthreads;
int size;
BENCHMARK(no_amb, iters) {
run_once(nthreads, size, iters);
}
BENCHMARK(no_amb_dup, iters) {
run_once(nthreads, size, iters);
}
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
gflags::ParseCommandLineFlags(&argc, &argv, true);
std::cout << "--------------------------------------------------- No AMB\n";
for (int i : nthr) {
nthreads = i;
for (int j : sizes) {
size = j;
std::cout << i << " threads -- " << j << "-item list" << std::endl;
bench("no amb ", i, j, 0);
bench("no amb - dup ", i, j, 0);
}
}
std::cout << "----------------------------------------------------------\n";
}
/*
* Copyright 2017 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <folly/Benchmark.h>
#include <folly/experimental/hazptr/example/SWMRList.h>
#include <folly/portability/GTest.h>
#include <glog/logging.h>
#include <atomic>
#include <thread>
namespace folly {
namespace hazptr {
inline uint64_t run_once(int nthreads, int size, int ops) {
folly::BenchmarkSuspender susp;
SWMRListSet<uint64_t> s;
std::atomic<bool> start{false};
std::atomic<int> started{0};
for (int i = 0; i < size; ++i) {
s.add(i);
}
std::vector<std::thread> threads(nthreads);
for (int tid = 0; tid < nthreads; ++tid) {
threads[tid] = std::thread([&, tid] {
started.fetch_add(1);
while (!start.load())
/* spin */;
for (int j = tid; j < ops; j += nthreads) {
s.contains(j);
}
});
}
while (started.load() < nthreads)
/* spin */;
// begin time measurement
auto tbegin = std::chrono::steady_clock::now();
susp.dismiss();
start.store(true);
for (auto& t : threads) {
t.join();
}
susp.rehire();
// end time measurement
uint64_t duration = 0;
auto tend = std::chrono::steady_clock::now();
duration = std::chrono::duration_cast<std::chrono::nanoseconds>(tend - tbegin)
.count();
return duration;
}
inline uint64_t bench(std::string name, int nthreads, int size, uint64_t base) {
int reps = 10;
int ops = 100000;
uint64_t min = UINTMAX_MAX;
uint64_t max = 0;
uint64_t sum = 0;
run_once(nthreads, size, ops); // sometimes first run is outlier
for (int r = 0; r < reps; ++r) {
uint64_t dur = run_once(nthreads, size, ops);
sum += dur;
min = std::min(min, dur);
max = std::max(max, dur);
}
uint64_t avg = sum / reps;
uint64_t res = min;
std::cout << name;
std::cout << " " << std::setw(4) << max / ops << " ns";
std::cout << " " << std::setw(4) << avg / ops << " ns";
std::cout << " " << std::setw(4) << res / ops << " ns";
if (base) {
std::cout << " " << std::setw(3) << 100 * base / res << "%";
}
std::cout << std::endl;
return res;
}
const int nthr[] = {1, 10};
const int sizes[] = {10, 100};
const std::string header = "Test_name, Max time, Avg time, Min time";
} // namespace folly {
} // namespace hazptr {
/*
--------------------------------------------------- No AMB
1 threads -- 10-item list
no amb 210 ns 204 ns 202 ns
no amb - dup 213 ns 207 ns 203 ns
1 threads -- 100-item list
no amb 1862 ns 1810 ns 1778 ns
no amb - dup 1791 ns 1785 ns 1777 ns
10 threads -- 10-item list
no amb 227 ns 161 ns 143 ns
no amb - dup 145 ns 144 ns 143 ns
10 threads -- 100-item list
no amb 520 ns 518 ns 515 ns
no amb - dup 684 ns 536 ns 516 ns
----------------------------------------------------------
------------------------------------------------------ AMB
1 threads -- 10-item list
amb 48 ns 46 ns 45 ns
amb - dup 47 ns 45 ns 45 ns
1 threads -- 100-item list
amb 242 ns 236 ns 234 ns
amb - dup 243 ns 238 ns 234 ns
10 threads -- 10-item list
amb 226 ns 130 ns 109 ns
amb - dup 161 ns 115 ns 109 ns
10 threads -- 100-item list
amb 192 ns 192 ns 191 ns
amb - dup 416 ns 324 ns 192 ns
----------------------------------------------------------
*/
......@@ -17,11 +17,13 @@
#include <glog/logging.h>
#define DO_DEBUG true
#ifndef HAZPTR_DEBUG
#define HAZPTR_DEBUG false
#endif
#define DEBUG_PRINT(...) \
do { \
if (DO_DEBUG) { \
if (HAZPTR_DEBUG) { \
VLOG(2) << __func__ << " --- " << __VA_ARGS__; \
} \
} while (false)
......@@ -19,6 +19,30 @@
#error "This should only be included by hazptr.h"
#endif
/* quality of implementation switches */
// NOTE: The #ifndef pattern is prone to ODR violation. Its use for
// quality of implementation options is temporary. Eventually these
// options should be added to the API in future API extensions.
#ifndef HAZPTR_AMB
#define HAZPTR_AMB true
#endif
#ifndef HAZPTR_SCAN_MULT
#define HAZPTR_SCAN_MULT 2
#endif
#ifndef HAZPTR_SCAN_THRESHOLD
#define HAZPTR_SCAN_THRESHOLD 1000
#endif
/* stats switch */
#ifndef HAZPTR_STATS
#define HAZPTR_STATS false
#endif
#include <folly/experimental/AsymmetricMemoryBarrier.h>
#include <folly/experimental/hazptr/debug.h>
#include <unordered_set>
......@@ -26,10 +50,32 @@
namespace folly {
namespace hazptr {
/**
* helper classes and functions
*/
/** hazptr_stats */
class hazptr_stats;
hazptr_stats& hazptr_stats();
/** hazptr_mb */
class hazptr_mb {
public:
static void light();
static void heavy();
};
/**
* public functions
*/
/** hazptr_domain */
constexpr hazptr_domain::hazptr_domain(memory_resource* mr) noexcept
: mr_(mr) {}
constexpr hazptr_domain::hazptr_domain(memory_resource* mr) noexcept : mr_(mr) {
hazptr_stats();
}
/** hazptr_obj_base */
......@@ -85,7 +131,8 @@ inline bool hazptr_owner<T>::try_protect(T*& ptr, const A& src) noexcept {
"Return type of A::load() must be T*");
DEBUG_PRINT(this << " " << ptr << " " << &src);
set(ptr);
T* p = src.load();
/*** Full fence ***/ hazptr_mb::light();
T* p = src.load(std::memory_order_acquire);
if (p != ptr) {
ptr = p;
clear();
......@@ -100,7 +147,7 @@ inline T* hazptr_owner<T>::get_protected(const A& src) noexcept {
static_assert(
std::is_same<decltype(std::declval<A>().load()), T*>::value,
"Return type of A::load() must be T*");
T* p = src.load();
T* p = src.load(std::memory_order_relaxed);
while (!try_protect(p, src)) {}
DEBUG_PRINT(this << " " << p << " " << &src);
return p;
......@@ -133,14 +180,11 @@ inline void swap(hazptr_owner<T>& lhs, hazptr_owner<T>& rhs) noexcept {
lhs.swap(rhs);
}
////////////////////////////////////////////////////////////////////////////////
// Non-template part of implementation
////////////////////////////////////////////////////////////////////////////////
// [TODO]:
// - Thread caching of hazptr_rec-s
// - Private storage of retired objects
// - Control of reclamation (when and by whom)
// - Optimized memory order
/** Definition of default_hazptr_domain() */
inline hazptr_domain& default_hazptr_domain() {
......@@ -153,23 +197,24 @@ inline hazptr_domain& default_hazptr_domain() {
inline void hazptr_rec::set(const void* p) noexcept {
DEBUG_PRINT(this << " " << p);
hazptr_.store(p);
hazptr_.store(p, std::memory_order_release);
}
inline const void* hazptr_rec::get() const noexcept {
DEBUG_PRINT(this << " " << hazptr_.load());
return hazptr_.load();
auto p = hazptr_.load(std::memory_order_acquire);
DEBUG_PRINT(this << " " << p);
return p;
}
inline void hazptr_rec::clear() noexcept {
DEBUG_PRINT(this);
hazptr_.store(nullptr);
hazptr_.store(nullptr, std::memory_order_release);
}
inline void hazptr_rec::release() noexcept {
DEBUG_PRINT(this);
clear();
active_.store(false);
active_.store(false, std::memory_order_release);
}
/** hazptr_obj */
......@@ -196,27 +241,25 @@ inline hazptr_domain::~hazptr_domain() {
}
{ /* free all hazptr_rec-s */
hazptr_rec* next;
for (auto p = hazptrs_.load(); p; p = next) {
for (auto p = hazptrs_.load(std::memory_order_acquire); p; p = next) {
next = p->next_;
mr_->deallocate(static_cast<void*>(p), sizeof(hazptr_rec));
}
}
}
inline void hazptr_domain::try_reclaim() {
DEBUG_PRINT(this);
rcount_.exchange(0);
bulkReclaim();
}
inline hazptr_rec* hazptr_domain::hazptrAcquire() {
hazptr_rec* p;
hazptr_rec* next;
for (p = hazptrs_.load(); p; p = next) {
for (p = hazptrs_.load(std::memory_order_acquire); p; p = next) {
next = p->next_;
bool active = p->active_.load();
bool active = p->active_.load(std::memory_order_acquire);
if (!active) {
if (p->active_.compare_exchange_weak(active, true)) {
if (p->active_.compare_exchange_weak(
active,
true,
std::memory_order_release,
std::memory_order_relaxed)) {
DEBUG_PRINT(this << " " << p);
return p;
}
......@@ -226,10 +269,14 @@ inline hazptr_rec* hazptr_domain::hazptrAcquire() {
if (p == nullptr) {
return nullptr;
}
p->active_.store(true);
p->active_.store(true, std::memory_order_relaxed);
p->next_ = hazptrs_.load(std::memory_order_acquire);
do {
p->next_ = hazptrs_.load();
if (hazptrs_.compare_exchange_weak(p->next_, p)) {
if (hazptrs_.compare_exchange_weak(
p->next_,
p,
std::memory_order_release,
std::memory_order_acquire)) {
break;
}
} while (true);
......@@ -245,14 +292,21 @@ inline void hazptr_domain::hazptrRelease(hazptr_rec* p) noexcept {
inline int
hazptr_domain::pushRetired(hazptr_obj* head, hazptr_obj* tail, int count) {
tail->next_ = retired_.load();
while (!retired_.compare_exchange_weak(tail->next_, head)) {}
/*** Full fence ***/ hazptr_mb::light();
tail->next_ = retired_.load(std::memory_order_acquire);
while (!retired_.compare_exchange_weak(
tail->next_,
head,
std::memory_order_release,
std::memory_order_acquire)) {
}
return rcount_.fetch_add(count);
}
inline void hazptr_domain::objRetire(hazptr_obj* p) {
auto rcount = pushRetired(p, p, 1) + 1;
if (rcount >= kScanThreshold * hcount_.load()) {
if (rcount >= HAZPTR_SCAN_THRESHOLD &&
rcount >= HAZPTR_SCAN_MULT * hcount_.load(std::memory_order_acquire)) {
tryBulkReclaim();
}
}
......@@ -260,12 +314,13 @@ inline void hazptr_domain::objRetire(hazptr_obj* p) {
inline void hazptr_domain::tryBulkReclaim() {
DEBUG_PRINT(this);
do {
auto hcount = hcount_.load();
auto rcount = rcount_.load();
if (rcount < kScanThreshold * hcount) {
auto hcount = hcount_.load(std::memory_order_acquire);
auto rcount = rcount_.load(std::memory_order_acquire);
if (rcount < HAZPTR_SCAN_THRESHOLD || rcount < HAZPTR_SCAN_MULT * hcount) {
return;
}
if (rcount_.compare_exchange_weak(rcount, 0)) {
if (rcount_.compare_exchange_weak(
rcount, 0, std::memory_order_release, std::memory_order_relaxed)) {
break;
}
} while (true);
......@@ -274,11 +329,12 @@ inline void hazptr_domain::tryBulkReclaim() {
inline void hazptr_domain::bulkReclaim() {
DEBUG_PRINT(this);
auto p = retired_.exchange(nullptr);
auto h = hazptrs_.load();
std::unordered_set<const void*> hs;
/*** Full fence ***/ hazptr_mb::heavy();
auto p = retired_.exchange(nullptr, std::memory_order_acquire);
auto h = hazptrs_.load(std::memory_order_acquire);
std::unordered_set<const void*> hs; // TODO lock-free alternative
for (; h; h = h->next_) {
hs.insert(h->hazptr_.load());
hs.insert(h->hazptr_.load(std::memory_order_relaxed));
}
int rcount = 0;
hazptr_obj* retired = nullptr;
......@@ -303,5 +359,74 @@ inline void hazptr_domain::bulkReclaim() {
}
}
/** hazptr_stats */
class hazptr_stats {
public:
~hazptr_stats();
void light();
void heavy();
void seq_cst();
private:
std::atomic<uint64_t> light_{0};
std::atomic<uint64_t> heavy_{0};
std::atomic<uint64_t> seq_cst_{0};
};
inline hazptr_stats::~hazptr_stats() {
DEBUG_PRINT(this << " light " << light_.load());
DEBUG_PRINT(this << " heavy " << heavy_.load());
DEBUG_PRINT(this << " seq_cst " << seq_cst_.load());
}
inline void hazptr_stats::light() {
if (HAZPTR_STATS) {
/* atomic */ ++light_;
}
}
inline void hazptr_stats::heavy() {
if (HAZPTR_STATS) {
/* atomic */ ++heavy_;
}
}
inline void hazptr_stats::seq_cst() {
if (HAZPTR_STATS) {
/* atomic */ ++seq_cst_;
}
}
inline class hazptr_stats& hazptr_stats() {
static class hazptr_stats stats_;
DEBUG_PRINT(&stats_);
return stats_;
}
/** hazptr_mb */
inline void hazptr_mb::light() {
DEBUG_PRINT("");
if (HAZPTR_AMB) {
folly::asymmetricLightBarrier();
hazptr_stats().light();
} else {
atomic_thread_fence(std::memory_order_seq_cst);
hazptr_stats().seq_cst();
}
}
inline void hazptr_mb::heavy() {
DEBUG_PRINT("");
if (HAZPTR_AMB) {
folly::asymmetricHeavyBarrier();
hazptr_stats().heavy();
} else {
atomic_thread_fence(std::memory_order_seq_cst);
hazptr_stats().seq_cst();
}
}
} // namespace folly
} // namespace hazptr
......@@ -53,10 +53,8 @@ class hazptr_domain {
private:
template <typename, typename>
friend class hazptr_obj_base;
template <typename> friend class hazptr_owner;
/** Constant -- May be changed to parameter in the future */
enum { kScanThreshold = 3 };
template <typename>
friend class hazptr_owner;
memory_resource* mr_;
std::atomic<hazptr_rec*> hazptrs_ = {nullptr};
......@@ -70,7 +68,6 @@ class hazptr_domain {
int pushRetired(hazptr_obj* head, hazptr_obj* tail, int count);
void tryBulkReclaim();
void bulkReclaim();
void try_reclaim();
};
/** Get the default hazptr_domain */
......
......@@ -13,6 +13,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define HAZPTR_DEBUG true
#define HAZPTR_STATS true
#define HAZPTR_SCAN_THRESHOLD 10
#include <folly/experimental/hazptr/test/HazptrUse1.h>
#include <folly/experimental/hazptr/test/HazptrUse2.h>
#include <folly/experimental/hazptr/example/LockFreeLIFO.h>
......@@ -26,7 +30,7 @@
#include <thread>
DEFINE_int32(num_threads, 1, "Number of threads");
DEFINE_int32(num_threads, 5, "Number of threads");
DEFINE_int64(num_reps, 1, "Number of test reps");
DEFINE_int64(num_ops, 10, "Number of ops or pairs of ops per rep");
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment