Commit 65180b25 authored by Giuseppe Ottaviano's avatar Giuseppe Ottaviano Committed by Facebook GitHub Bot

Reduce memory usage of CoreCachedSharedPtr

Summary:
We only need as many slots as the number of L1 caches.

Also avoid allocating control blocks when the passed pointer has no managed object.

Reviewed By: philippv, luciang

Differential Revision: D29872059

fbshipit-source-id: 8c221b0523494c44a5c6828bafd26eeb00e573c4
parent 5fbc8492
...@@ -484,8 +484,9 @@ class CoreRawAllocator { ...@@ -484,8 +484,9 @@ class CoreRawAllocator {
}; };
template <typename T, size_t Stripes> template <typename T, size_t Stripes>
CxxAllocatorAdaptor<T, typename CoreRawAllocator<Stripes>::Allocator> FOLLY_EXPORT
getCoreAllocator(size_t stripe) { CxxAllocatorAdaptor<T, typename CoreRawAllocator<Stripes>::Allocator>
getCoreAllocator(size_t stripe) {
// We cannot make sure that the allocator will be destroyed after // We cannot make sure that the allocator will be destroyed after
// all the objects allocated with it, so we leak it. // all the objects allocated with it, so we leak it.
static Indestructible<CoreRawAllocator<Stripes>> allocator; static Indestructible<CoreRawAllocator<Stripes>> allocator;
......
...@@ -17,18 +17,56 @@ ...@@ -17,18 +17,56 @@
#pragma once #pragma once
#include <array> #include <array>
#include <atomic>
#include <memory> #include <memory>
#include <folly/CppAttributes.h>
#include <folly/Portability.h> #include <folly/Portability.h>
#include <folly/Unit.h>
#include <folly/concurrency/CacheLocality.h> #include <folly/concurrency/CacheLocality.h>
#include <folly/container/Enumerate.h>
#include <folly/synchronization/Hazptr.h> #include <folly/synchronization/Hazptr.h>
namespace folly { namespace folly {
// On mobile we do not expect high concurrency, and memory is more important, so // On mobile we do not expect high concurrency, and memory is more important, so
// use more conservative caching. // use more conservative caching.
constexpr size_t kCoreCachedSharedPtrDefaultNumSlots = kIsMobile ? 4 : 64; constexpr size_t kCoreCachedSharedPtrDefaultMaxSlots = kIsMobile ? 4 : 64;
namespace core_cached_shared_ptr_detail {
template <size_t kMaxSlots>
class SlotsConfig {
public:
FOLLY_EXPORT static void initialize() {
FOLLY_MAYBE_UNUSED static const Unit _ = [] {
// We need at most as many slots as the number of L1 caches, so we can
// avoid wasting memory if more slots are requested.
const auto l1Caches = CacheLocality::system().numCachesByLevel.front();
num_ = std::min(std::max<size_t>(1, l1Caches), kMaxSlots);
return unit;
}();
}
static size_t num() { return num_.load(std::memory_order_relaxed); }
private:
static std::atomic<size_t> num_;
};
// Initialize with a valid num so that get() always returns a valid stripe, even
// if initialize() has not been called yet.
template <size_t kMaxSlots>
std::atomic<size_t> SlotsConfig<kMaxSlots>::num_{1};
// Check whether a shared_ptr is equivalent to default-constructed. Because of
// aliasing constructors, there can be both nullptr with a managed object, and
// non-nullptr with no managed object, so we need to check both.
template <class T>
bool isDefault(const std::shared_ptr<T>& p) {
return p == nullptr && p.use_count() == 0;
}
} // namespace core_cached_shared_ptr_detail
/** /**
* This class creates core-local caches for a given shared_ptr, to * This class creates core-local caches for a given shared_ptr, to
...@@ -36,29 +74,36 @@ constexpr size_t kCoreCachedSharedPtrDefaultNumSlots = kIsMobile ? 4 : 64; ...@@ -36,29 +74,36 @@ constexpr size_t kCoreCachedSharedPtrDefaultNumSlots = kIsMobile ? 4 : 64;
* *
* It has the same thread-safety guarantees as shared_ptr: it is safe * It has the same thread-safety guarantees as shared_ptr: it is safe
* to concurrently call get(), but reset()s must be synchronized with * to concurrently call get(), but reset()s must be synchronized with
* reads and other resets(). * reads and other reset()s.
* *
* @author Giuseppe Ottaviano <ott@fb.com> * @author Giuseppe Ottaviano <ott@fb.com>
*/ */
template <class T, size_t kNumSlots = kCoreCachedSharedPtrDefaultNumSlots> template <class T, size_t kMaxSlots = kCoreCachedSharedPtrDefaultMaxSlots>
class CoreCachedSharedPtr { class CoreCachedSharedPtr {
using SlotsConfig = core_cached_shared_ptr_detail::SlotsConfig<kMaxSlots>;
public: public:
CoreCachedSharedPtr() = default; CoreCachedSharedPtr() = default;
explicit CoreCachedSharedPtr(const std::shared_ptr<T>& p) { reset(p); } explicit CoreCachedSharedPtr(const std::shared_ptr<T>& p) { reset(p); }
void reset(const std::shared_ptr<T>& p = nullptr) { void reset(const std::shared_ptr<T>& p = nullptr) {
SlotsConfig::initialize();
// Allocate each Holder in a different CoreRawAllocator stripe to // Allocate each Holder in a different CoreRawAllocator stripe to
// prevent false sharing. Their control blocks will be adjacent // prevent false sharing. Their control blocks will be adjacent
// thanks to allocate_shared(). // thanks to allocate_shared().
for (auto slot : folly::enumerate(slots_)) { for (size_t i = 0; i < SlotsConfig::num(); ++i) {
auto alloc = getCoreAllocator<Holder, kNumSlots>(slot.index); // Try freeing the control block before allocating a new one.
auto holder = std::allocate_shared<Holder>(alloc, p); slots_[i] = {};
*slot = std::shared_ptr<T>(holder, p.get()); if (!core_cached_shared_ptr_detail::isDefault(p)) {
auto alloc = getCoreAllocator<Holder, kMaxSlots>(i);
auto holder = std::allocate_shared<Holder>(alloc, p);
slots_[i] = std::shared_ptr<T>(holder, p.get());
}
} }
} }
std::shared_ptr<T> get() const { std::shared_ptr<T> get() const {
return slots_[AccessSpreader<>::cachedCurrent(kNumSlots)]; return slots_[AccessSpreader<>::cachedCurrent(SlotsConfig::num())];
} }
private: private:
...@@ -67,35 +112,38 @@ class CoreCachedSharedPtr { ...@@ -67,35 +112,38 @@ class CoreCachedSharedPtr {
template <class, size_t> template <class, size_t>
friend class CoreCachedWeakPtr; friend class CoreCachedWeakPtr;
std::array<std::shared_ptr<T>, kNumSlots> slots_; std::array<std::shared_ptr<T>, kMaxSlots> slots_;
}; };
template <class T, size_t kNumSlots = kCoreCachedSharedPtrDefaultNumSlots> template <class T, size_t kMaxSlots = kCoreCachedSharedPtrDefaultMaxSlots>
class CoreCachedWeakPtr { class CoreCachedWeakPtr {
using SlotsConfig = core_cached_shared_ptr_detail::SlotsConfig<kMaxSlots>;
public: public:
CoreCachedWeakPtr() = default; CoreCachedWeakPtr() = default;
explicit CoreCachedWeakPtr(const CoreCachedSharedPtr<T, kNumSlots>& p) { explicit CoreCachedWeakPtr(const CoreCachedSharedPtr<T, kMaxSlots>& p) {
reset(p); reset(p);
} }
void reset() { *this = {}; } void reset() { *this = {}; }
void reset(const CoreCachedSharedPtr<T, kNumSlots>& p) { void reset(const CoreCachedSharedPtr<T, kMaxSlots>& p) {
for (auto slot : folly::enumerate(slots_)) { SlotsConfig::initialize();
*slot = p.slots_[slot.index]; for (size_t i = 0; i < SlotsConfig::num(); ++i) {
slots_[i] = p.slots_[i];
} }
} }
std::weak_ptr<T> get() const { std::weak_ptr<T> get() const {
return slots_[AccessSpreader<>::cachedCurrent(kNumSlots)]; return slots_[AccessSpreader<>::cachedCurrent(SlotsConfig::num())];
} }
// Faster than get().lock(), as it avoid one weak count cycle. // Faster than get().lock(), as it avoid one weak count cycle.
std::shared_ptr<T> lock() const { std::shared_ptr<T> lock() const {
return slots_[AccessSpreader<>::cachedCurrent(kNumSlots)].lock(); return slots_[AccessSpreader<>::cachedCurrent(SlotsConfig::num())].lock();
} }
private: private:
std::array<std::weak_ptr<T>, kNumSlots> slots_; std::array<std::weak_ptr<T>, kMaxSlots> slots_;
}; };
/** /**
...@@ -110,52 +158,53 @@ class CoreCachedWeakPtr { ...@@ -110,52 +158,53 @@ class CoreCachedWeakPtr {
* get()s will never see a newer pointer on one core, and an older * get()s will never see a newer pointer on one core, and an older
* pointer on another after a subsequent thread migration. * pointer on another after a subsequent thread migration.
*/ */
template <class T, size_t kNumSlots = kCoreCachedSharedPtrDefaultNumSlots> template <class T, size_t kMaxSlots = kCoreCachedSharedPtrDefaultMaxSlots>
class AtomicCoreCachedSharedPtr { class AtomicCoreCachedSharedPtr {
using SlotsConfig = core_cached_shared_ptr_detail::SlotsConfig<kMaxSlots>;
public: public:
explicit AtomicCoreCachedSharedPtr(const std::shared_ptr<T>& p = nullptr) { AtomicCoreCachedSharedPtr() = default;
reset(p); explicit AtomicCoreCachedSharedPtr(const std::shared_ptr<T>& p) { reset(p); }
}
~AtomicCoreCachedSharedPtr() { ~AtomicCoreCachedSharedPtr() {
auto slots = slots_.load(std::memory_order_acquire);
// Delete of AtomicCoreCachedSharedPtr must be synchronized, no // Delete of AtomicCoreCachedSharedPtr must be synchronized, no
// need for slots->retire(). // need for slots->retire().
if (slots) { delete slots_.load(std::memory_order_acquire);
delete slots;
}
} }
void reset(const std::shared_ptr<T>& p = nullptr) { void reset(const std::shared_ptr<T>& p = nullptr) {
auto newslots = std::make_unique<Slots>(); SlotsConfig::initialize();
// Allocate each Holder in a different CoreRawAllocator stripe to std::unique_ptr<Slots> newslots;
// prevent false sharing. Their control blocks will be adjacent if (!core_cached_shared_ptr_detail::isDefault(p)) {
// thanks to allocate_shared(). newslots = std::make_unique<Slots>();
for (auto slot : folly::enumerate(newslots->slots_)) { // Allocate each Holder in a different CoreRawAllocator stripe to
auto alloc = getCoreAllocator<Holder, kNumSlots>(slot.index); // prevent false sharing. Their control blocks will be adjacent
auto holder = std::allocate_shared<Holder>(alloc, p); // thanks to allocate_shared().
*slot = std::shared_ptr<T>(holder, p.get()); for (size_t i = 0; i < SlotsConfig::num(); ++i) {
auto alloc = getCoreAllocator<Holder, kMaxSlots>(i);
auto holder = std::allocate_shared<Holder>(alloc, p);
newslots->slots[i] = std::shared_ptr<T>(holder, p.get());
}
} }
auto oldslots = slots_.exchange(newslots.release()); if (auto oldslots = slots_.exchange(newslots.release())) {
if (oldslots) {
oldslots->retire(); oldslots->retire();
} }
} }
std::shared_ptr<T> get() const { std::shared_ptr<T> get() const {
folly::hazptr_local<1> hazptr; folly::hazptr_local<1> hazptr;
auto slots = hazptr[0].protect(slots_); if (auto slots = hazptr[0].protect(slots_)) {
if (!slots) { return slots->slots[AccessSpreader<>::cachedCurrent(SlotsConfig::num())];
} else {
return nullptr; return nullptr;
} }
return (slots->slots_)[AccessSpreader<>::cachedCurrent(kNumSlots)];
} }
private: private:
using Holder = std::shared_ptr<T>; using Holder = std::shared_ptr<T>;
struct Slots : folly::hazptr_obj_base<Slots> { struct Slots : folly::hazptr_obj_base<Slots> {
std::array<std::shared_ptr<T>, kNumSlots> slots_; std::array<std::shared_ptr<T>, kMaxSlots> slots;
}; };
std::atomic<Slots*> slots_{nullptr}; std::atomic<Slots*> slots_{nullptr};
}; };
......
...@@ -32,6 +32,22 @@ ...@@ -32,6 +32,22 @@
#include <folly/experimental/ReadMostlySharedPtr.h> #include <folly/experimental/ReadMostlySharedPtr.h>
#include <folly/portability/GTest.h> #include <folly/portability/GTest.h>
namespace {
template <class Operation>
void parallelRun(
Operation op, size_t numThreads = std::thread::hardware_concurrency()) {
std::vector<std::thread> threads;
for (size_t t = 0; t < numThreads; ++t) {
threads.emplace_back([&, t] { op(t); });
}
for (auto& t : threads) {
t.join();
}
}
} // namespace
TEST(CoreCachedSharedPtr, Basic) { TEST(CoreCachedSharedPtr, Basic) {
auto p = std::make_shared<int>(1); auto p = std::make_shared<int>(1);
std::weak_ptr<int> wp(p); std::weak_ptr<int> wp(p);
...@@ -45,6 +61,13 @@ TEST(CoreCachedSharedPtr, Basic) { ...@@ -45,6 +61,13 @@ TEST(CoreCachedSharedPtr, Basic) {
ASSERT_EQ(*p2, 1); ASSERT_EQ(*p2, 1);
ASSERT_FALSE(wp2.expired()); ASSERT_FALSE(wp2.expired());
// Check that other cores get the correct shared_ptr too.
parallelRun([&](size_t) {
EXPECT_TRUE(cached.get().get() == p.get());
EXPECT_EQ(*cached.get(), 1);
EXPECT_EQ(*wcached.lock(), 1);
});
p.reset(); p.reset();
cached.reset(); cached.reset();
// p2 should survive. // p2 should survive.
...@@ -57,11 +80,99 @@ TEST(CoreCachedSharedPtr, Basic) { ...@@ -57,11 +80,99 @@ TEST(CoreCachedSharedPtr, Basic) {
ASSERT_TRUE(wp2.expired()); ASSERT_TRUE(wp2.expired());
} }
TEST(CoreCachedSharedPtr, AtomicCoreCachedSharedPtr) {
constexpr size_t kIters = 2000;
{
folly::AtomicCoreCachedSharedPtr<size_t> p;
parallelRun([&](size_t) {
for (size_t i = 0; i < kIters; ++i) {
p.reset(std::make_shared<size_t>(i));
EXPECT_TRUE(p.get());
// Just read the value, and ensure that ASAN/TSAN do not complain.
EXPECT_GE(*p.get(), 0);
}
});
}
{
// One writer thread, all other readers, verify consistency.
std::atomic<size_t> largestValueObserved{0};
folly::AtomicCoreCachedSharedPtr<size_t> p{std::make_shared<size_t>(0)};
parallelRun([&](size_t t) {
if (t == 0) {
for (size_t i = 0; i < kIters; ++i) {
p.reset(std::make_shared<size_t>(i + 1));
}
} else {
while (true) {
auto exp = largestValueObserved.load();
auto value = *p.get();
EXPECT_GE(value, exp);
// Maintain the maximum value observed so far. As soon as one thread
// observes an update, they all should observe it.
while (value > exp &&
!largestValueObserved.compare_exchange_weak(exp, value)) {
}
if (exp == kIters) {
break;
}
}
}
});
}
}
namespace {
template <class Holder>
void testAliasingCornerCases() {
{
Holder h;
auto p1 = std::make_shared<int>(1);
std::weak_ptr<int> w1 = p1;
// Aliasing constructor, p2 is nullptr but still manages the object in p1.
std::shared_ptr<int> p2(p1, nullptr);
// And now it's the only reference left.
p1.reset();
EXPECT_FALSE(w1.expired());
// Pass the ownership to the Holder.
h.reset(p2);
p2.reset();
// Object should still be alive.
EXPECT_FALSE(w1.expired());
// And resetting will destroy it.
h.reset();
folly::hazptr_cleanup();
EXPECT_TRUE(w1.expired());
}
{
Holder h;
int x = 1;
// p points to x, but has no managed object.
std::shared_ptr<int> p(std::shared_ptr<int>{}, &x);
h.reset(p);
EXPECT_TRUE(h.get().get() == &x);
}
}
} // namespace
TEST(CoreCachedSharedPtr, AliasingCornerCases) {
testAliasingCornerCases<folly::CoreCachedSharedPtr<int>>();
}
TEST(CoreCachedSharedPtr, AliasingCornerCasesAtomic) {
testAliasingCornerCases<folly::AtomicCoreCachedSharedPtr<int>>();
}
namespace { namespace {
template <class Operation> template <class Operation>
size_t parallelRun(Operation op, size_t numThreads) { size_t benchmarkParallelRun(Operation op, size_t numThreads) {
constexpr size_t kIters = 1000 * 1000; constexpr size_t kIters = 2 * 1000 * 1000;
std::vector<std::thread> threads; std::vector<std::thread> threads;
...@@ -97,47 +208,47 @@ size_t parallelRun(Operation op, size_t numThreads) { ...@@ -97,47 +208,47 @@ size_t parallelRun(Operation op, size_t numThreads) {
size_t benchmarkSharedPtrAcquire(size_t numThreads) { size_t benchmarkSharedPtrAcquire(size_t numThreads) {
auto p = std::make_shared<int>(1); auto p = std::make_shared<int>(1);
return parallelRun([&] { return p; }, numThreads); return benchmarkParallelRun([&] { return p; }, numThreads);
} }
size_t benchmarkWeakPtrLock(size_t numThreads) { size_t benchmarkWeakPtrLock(size_t numThreads) {
auto p = std::make_shared<int>(1); auto p = std::make_shared<int>(1);
std::weak_ptr<int> wp = p; std::weak_ptr<int> wp = p;
return parallelRun([&] { return wp.lock(); }, numThreads); return benchmarkParallelRun([&] { return wp.lock(); }, numThreads);
} }
size_t benchmarkAtomicSharedPtrAcquire(size_t numThreads) { size_t benchmarkAtomicSharedPtrAcquire(size_t numThreads) {
auto s = std::make_shared<int>(1); auto s = std::make_shared<int>(1);
folly::atomic_shared_ptr<int> p; folly::atomic_shared_ptr<int> p;
p.store(s); p.store(s);
return parallelRun([&] { return p.load(); }, numThreads); return benchmarkParallelRun([&] { return p.load(); }, numThreads);
} }
size_t benchmarkCoreCachedSharedPtrAcquire(size_t numThreads) { size_t benchmarkCoreCachedSharedPtrAcquire(size_t numThreads) {
folly::CoreCachedSharedPtr<int> p(std::make_shared<int>(1)); folly::CoreCachedSharedPtr<int> p(std::make_shared<int>(1));
return parallelRun([&] { return p.get(); }, numThreads); return benchmarkParallelRun([&] { return p.get(); }, numThreads);
} }
size_t benchmarkCoreCachedWeakPtrLock(size_t numThreads) { size_t benchmarkCoreCachedWeakPtrLock(size_t numThreads) {
folly::CoreCachedSharedPtr<int> p(std::make_shared<int>(1)); folly::CoreCachedSharedPtr<int> p(std::make_shared<int>(1));
folly::CoreCachedWeakPtr<int> wp(p); folly::CoreCachedWeakPtr<int> wp(p);
return parallelRun([&] { return wp.lock(); }, numThreads); return benchmarkParallelRun([&] { return wp.lock(); }, numThreads);
} }
size_t benchmarkAtomicCoreCachedSharedPtrAcquire(size_t numThreads) { size_t benchmarkAtomicCoreCachedSharedPtrAcquire(size_t numThreads) {
folly::AtomicCoreCachedSharedPtr<int> p(std::make_shared<int>(1)); folly::AtomicCoreCachedSharedPtr<int> p(std::make_shared<int>(1));
return parallelRun([&] { return p.get(); }, numThreads); return benchmarkParallelRun([&] { return p.get(); }, numThreads);
} }
size_t benchmarkReadMostlySharedPtrAcquire(size_t numThreads) { size_t benchmarkReadMostlySharedPtrAcquire(size_t numThreads) {
folly::ReadMostlyMainPtr<int> p{std::make_shared<int>(1)}; folly::ReadMostlyMainPtr<int> p{std::make_shared<int>(1)};
return parallelRun([&] { return p.getShared(); }, numThreads); return benchmarkParallelRun([&] { return p.getShared(); }, numThreads);
} }
size_t benchmarkReadMostlyWeakPtrLock(size_t numThreads) { size_t benchmarkReadMostlyWeakPtrLock(size_t numThreads) {
folly::ReadMostlyMainPtr<int> p{std::make_shared<int>(1)}; folly::ReadMostlyMainPtr<int> p{std::make_shared<int>(1)};
folly::ReadMostlyWeakPtr<int> w{p}; folly::ReadMostlyWeakPtr<int> w{p};
return parallelRun([&] { return w.lock(); }, numThreads); return benchmarkParallelRun([&] { return w.lock(); }, numThreads);
} }
} // namespace } // namespace
...@@ -178,21 +289,21 @@ BENCHMARK_THREADS(64) ...@@ -178,21 +289,21 @@ BENCHMARK_THREADS(64)
BENCHMARK_MULTI(SharedPtrSingleThreadReset) { BENCHMARK_MULTI(SharedPtrSingleThreadReset) {
auto p = std::make_shared<int>(1); auto p = std::make_shared<int>(1);
return parallelRun([&] { p = std::make_shared<int>(1); }, 1); return benchmarkParallelRun([&] { p = std::make_shared<int>(1); }, 1);
} }
BENCHMARK_MULTI(AtomicSharedPtrSingleThreadReset) { BENCHMARK_MULTI(AtomicSharedPtrSingleThreadReset) {
auto s = std::make_shared<int>(1); auto s = std::make_shared<int>(1);
folly::atomic_shared_ptr<int> p; folly::atomic_shared_ptr<int> p;
p.store(s); p.store(s);
return parallelRun([&] { p.store(std::make_shared<int>(1)); }, 1); return benchmarkParallelRun([&] { p.store(std::make_shared<int>(1)); }, 1);
} }
BENCHMARK_MULTI(CoreCachedSharedPtrSingleThreadReset) { BENCHMARK_MULTI(CoreCachedSharedPtrSingleThreadReset) {
folly::CoreCachedSharedPtr<int> p(std::make_shared<int>(1)); folly::CoreCachedSharedPtr<int> p(std::make_shared<int>(1));
return parallelRun([&] { p.reset(std::make_shared<int>(1)); }, 1); return benchmarkParallelRun([&] { p.reset(std::make_shared<int>(1)); }, 1);
} }
BENCHMARK_MULTI(AtomicCoreCachedSharedPtrSingleThreadReset) { BENCHMARK_MULTI(AtomicCoreCachedSharedPtrSingleThreadReset) {
folly::AtomicCoreCachedSharedPtr<int> p(std::make_shared<int>(1)); folly::AtomicCoreCachedSharedPtr<int> p(std::make_shared<int>(1));
return parallelRun([&] { p.reset(std::make_shared<int>(1)); }, 1); return benchmarkParallelRun([&] { p.reset(std::make_shared<int>(1)); }, 1);
} }
int main(int argc, char** argv) { int main(int argc, char** argv) {
...@@ -216,63 +327,63 @@ $ buck-out/gen/folly/concurrency/test/core_cached_shared_ptr_test --benchmark -- ...@@ -216,63 +327,63 @@ $ buck-out/gen/folly/concurrency/test/core_cached_shared_ptr_test --benchmark --
============================================================================ ============================================================================
folly/concurrency/test/CoreCachedSharedPtrTest.cpprelative time/iter iters/s folly/concurrency/test/CoreCachedSharedPtrTest.cpprelative time/iter iters/s
============================================================================ ============================================================================
SharedPtrAcquire_1Threads 19.89ns 50.28M SharedPtrAcquire_1Threads 18.31ns 54.62M
WeakPtrLock_1Threads 22.21ns 45.02M WeakPtrLock_1Threads 20.07ns 49.83M
AtomicSharedPtrAcquire_1Threads 27.50ns 36.37M AtomicSharedPtrAcquire_1Threads 26.31ns 38.01M
CoreCachedSharedPtrAquire_1Threads 19.36ns 51.65M CoreCachedSharedPtrAquire_1Threads 20.17ns 49.57M
CoreCachedWeakPtrLock_1Threads 22.07ns 45.31M CoreCachedWeakPtrLock_1Threads 22.41ns 44.62M
AtomicCoreCachedSharedPtrAcquire_1Threads 22.68ns 44.09M AtomicCoreCachedSharedPtrAcquire_1Threads 23.59ns 42.40M
ReadMostlySharedPtrAcquire_1Threads 20.27ns 49.34M ReadMostlySharedPtrAcquire_1Threads 20.16ns 49.61M
ReadMostlyWeakPtrLock_1Threads 20.23ns 49.43M ReadMostlyWeakPtrLock_1Threads 20.46ns 48.87M
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
SharedPtrAcquire_4Threads 187.84ns 5.32M SharedPtrAcquire_4Threads 193.62ns 5.16M
WeakPtrLock_4Threads 207.78ns 4.81M WeakPtrLock_4Threads 216.29ns 4.62M
AtomicSharedPtrAcquire_4Threads 552.59ns 1.81M AtomicSharedPtrAcquire_4Threads 508.04ns 1.97M
CoreCachedSharedPtrAquire_4Threads 20.77ns 48.14M CoreCachedSharedPtrAquire_4Threads 21.52ns 46.46M
CoreCachedWeakPtrLock_4Threads 23.17ns 43.15M CoreCachedWeakPtrLock_4Threads 23.80ns 42.01M
AtomicCoreCachedSharedPtrAcquire_4Threads 23.85ns 41.94M AtomicCoreCachedSharedPtrAcquire_4Threads 24.81ns 40.30M
ReadMostlySharedPtrAcquire_4Threads 22.20ns 45.05M ReadMostlySharedPtrAcquire_4Threads 21.67ns 46.15M
ReadMostlyWeakPtrLock_4Threads 21.79ns 45.89M ReadMostlyWeakPtrLock_4Threads 21.72ns 46.04M
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
SharedPtrAcquire_8Threads 341.50ns 2.93M SharedPtrAcquire_8Threads 389.71ns 2.57M
WeakPtrLock_8Threads 463.56ns 2.16M WeakPtrLock_8Threads 467.29ns 2.14M
AtomicSharedPtrAcquire_8Threads 1.29us 776.90K AtomicSharedPtrAcquire_8Threads 1.38us 727.03K
CoreCachedSharedPtrAquire_8Threads 20.92ns 47.81M CoreCachedSharedPtrAquire_8Threads 21.49ns 46.53M
CoreCachedWeakPtrLock_8Threads 23.31ns 42.91M CoreCachedWeakPtrLock_8Threads 23.83ns 41.97M
AtomicCoreCachedSharedPtrAcquire_8Threads 23.84ns 41.94M AtomicCoreCachedSharedPtrAcquire_8Threads 24.68ns 40.52M
ReadMostlySharedPtrAcquire_8Threads 21.57ns 46.36M ReadMostlySharedPtrAcquire_8Threads 21.68ns 46.12M
ReadMostlyWeakPtrLock_8Threads 21.62ns 46.26M ReadMostlyWeakPtrLock_8Threads 21.48ns 46.55M
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
SharedPtrAcquire_16Threads 855.57ns 1.17M SharedPtrAcquire_16Threads 739.59ns 1.35M
WeakPtrLock_16Threads 898.42ns 1.11M WeakPtrLock_16Threads 896.23ns 1.12M
AtomicSharedPtrAcquire_16Threads 3.28us 304.83K AtomicSharedPtrAcquire_16Threads 2.88us 347.73K
CoreCachedSharedPtrAquire_16Threads 22.55ns 44.35M CoreCachedSharedPtrAquire_16Threads 21.98ns 45.50M
CoreCachedWeakPtrLock_16Threads 24.73ns 40.44M CoreCachedWeakPtrLock_16Threads 25.98ns 38.49M
AtomicCoreCachedSharedPtrAcquire_16Threads 26.24ns 38.10M AtomicCoreCachedSharedPtrAcquire_16Threads 26.44ns 37.82M
ReadMostlySharedPtrAcquire_16Threads 24.44ns 40.92M ReadMostlySharedPtrAcquire_16Threads 23.75ns 42.11M
ReadMostlyWeakPtrLock_16Threads 24.60ns 40.65M ReadMostlyWeakPtrLock_16Threads 22.89ns 43.70M
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
SharedPtrAcquire_32Threads 1.39us 717.39K SharedPtrAcquire_32Threads 1.36us 732.78K
WeakPtrLock_32Threads 2.02us 494.63K WeakPtrLock_32Threads 1.93us 518.58K
AtomicSharedPtrAcquire_32Threads 4.97us 201.20K AtomicSharedPtrAcquire_32Threads 5.68us 176.04K
CoreCachedSharedPtrAquire_32Threads 30.78ns 32.49M CoreCachedSharedPtrAquire_32Threads 29.24ns 34.20M
CoreCachedWeakPtrLock_32Threads 27.89ns 35.85M CoreCachedWeakPtrLock_32Threads 32.17ns 31.08M
AtomicCoreCachedSharedPtrAcquire_32Threads 30.56ns 32.72M AtomicCoreCachedSharedPtrAcquire_32Threads 28.67ns 34.88M
ReadMostlySharedPtrAcquire_32Threads 29.36ns 34.06M ReadMostlySharedPtrAcquire_32Threads 29.36ns 34.06M
ReadMostlyWeakPtrLock_32Threads 29.63ns 33.75M ReadMostlyWeakPtrLock_32Threads 27.27ns 36.67M
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
SharedPtrAcquire_64Threads 2.49us 402.30K SharedPtrAcquire_64Threads 2.39us 418.35K
WeakPtrLock_64Threads 4.57us 218.74K WeakPtrLock_64Threads 4.21us 237.61K
AtomicSharedPtrAcquire_64Threads 9.78us 102.28K AtomicSharedPtrAcquire_64Threads 8.63us 115.86K
CoreCachedSharedPtrAquire_64Threads 48.75ns 20.51M CoreCachedSharedPtrAquire_64Threads 49.70ns 20.12M
CoreCachedWeakPtrLock_64Threads 52.85ns 18.92M CoreCachedWeakPtrLock_64Threads 74.74ns 13.38M
AtomicCoreCachedSharedPtrAcquire_64Threads 56.58ns 17.67M AtomicCoreCachedSharedPtrAcquire_64Threads 56.09ns 17.83M
ReadMostlySharedPtrAcquire_64Threads 56.58ns 17.68M ReadMostlySharedPtrAcquire_64Threads 49.22ns 20.32M
ReadMostlyWeakPtrLock_64Threads 56.87ns 17.59M ReadMostlyWeakPtrLock_64Threads 49.16ns 20.34M
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
SharedPtrSingleThreadReset 10.50ns 95.28M SharedPtrSingleThreadReset 10.45ns 95.70M
AtomicSharedPtrSingleThreadReset 44.02ns 22.72M AtomicSharedPtrSingleThreadReset 42.83ns 23.35M
CoreCachedSharedPtrSingleThreadReset 4.57us 218.87K CoreCachedSharedPtrSingleThreadReset 2.51us 398.43K
AtomicCoreCachedSharedPtrSingleThreadReset 5.22us 191.47K AtomicCoreCachedSharedPtrSingleThreadReset 2.36us 423.31K
============================================================================ ============================================================================
#endif #endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment