Commit 65180b25 authored by Giuseppe Ottaviano's avatar Giuseppe Ottaviano Committed by Facebook GitHub Bot

Reduce memory usage of CoreCachedSharedPtr

Summary:
We only need as many slots as the number of L1 caches.

Also avoid allocating control blocks when the passed pointer has no managed object.

Reviewed By: philippv, luciang

Differential Revision: D29872059

fbshipit-source-id: 8c221b0523494c44a5c6828bafd26eeb00e573c4
parent 5fbc8492
......@@ -484,8 +484,9 @@ class CoreRawAllocator {
};
template <typename T, size_t Stripes>
CxxAllocatorAdaptor<T, typename CoreRawAllocator<Stripes>::Allocator>
getCoreAllocator(size_t stripe) {
FOLLY_EXPORT
CxxAllocatorAdaptor<T, typename CoreRawAllocator<Stripes>::Allocator>
getCoreAllocator(size_t stripe) {
// We cannot make sure that the allocator will be destroyed after
// all the objects allocated with it, so we leak it.
static Indestructible<CoreRawAllocator<Stripes>> allocator;
......
......@@ -17,18 +17,56 @@
#pragma once
#include <array>
#include <atomic>
#include <memory>
#include <folly/CppAttributes.h>
#include <folly/Portability.h>
#include <folly/Unit.h>
#include <folly/concurrency/CacheLocality.h>
#include <folly/container/Enumerate.h>
#include <folly/synchronization/Hazptr.h>
namespace folly {
// On mobile we do not expect high concurrency, and memory is more important, so
// use more conservative caching.
constexpr size_t kCoreCachedSharedPtrDefaultNumSlots = kIsMobile ? 4 : 64;
constexpr size_t kCoreCachedSharedPtrDefaultMaxSlots = kIsMobile ? 4 : 64;
namespace core_cached_shared_ptr_detail {
template <size_t kMaxSlots>
class SlotsConfig {
public:
FOLLY_EXPORT static void initialize() {
FOLLY_MAYBE_UNUSED static const Unit _ = [] {
// We need at most as many slots as the number of L1 caches, so we can
// avoid wasting memory if more slots are requested.
const auto l1Caches = CacheLocality::system().numCachesByLevel.front();
num_ = std::min(std::max<size_t>(1, l1Caches), kMaxSlots);
return unit;
}();
}
static size_t num() { return num_.load(std::memory_order_relaxed); }
private:
static std::atomic<size_t> num_;
};
// Initialize with a valid num so that get() always returns a valid stripe, even
// if initialize() has not been called yet.
template <size_t kMaxSlots>
std::atomic<size_t> SlotsConfig<kMaxSlots>::num_{1};
// Check whether a shared_ptr is equivalent to default-constructed. Because of
// aliasing constructors, there can be both nullptr with a managed object, and
// non-nullptr with no managed object, so we need to check both.
template <class T>
bool isDefault(const std::shared_ptr<T>& p) {
return p == nullptr && p.use_count() == 0;
}
} // namespace core_cached_shared_ptr_detail
/**
* This class creates core-local caches for a given shared_ptr, to
......@@ -36,29 +74,36 @@ constexpr size_t kCoreCachedSharedPtrDefaultNumSlots = kIsMobile ? 4 : 64;
*
* It has the same thread-safety guarantees as shared_ptr: it is safe
* to concurrently call get(), but reset()s must be synchronized with
* reads and other resets().
* reads and other reset()s.
*
* @author Giuseppe Ottaviano <ott@fb.com>
*/
template <class T, size_t kNumSlots = kCoreCachedSharedPtrDefaultNumSlots>
template <class T, size_t kMaxSlots = kCoreCachedSharedPtrDefaultMaxSlots>
class CoreCachedSharedPtr {
using SlotsConfig = core_cached_shared_ptr_detail::SlotsConfig<kMaxSlots>;
public:
CoreCachedSharedPtr() = default;
explicit CoreCachedSharedPtr(const std::shared_ptr<T>& p) { reset(p); }
void reset(const std::shared_ptr<T>& p = nullptr) {
SlotsConfig::initialize();
// Allocate each Holder in a different CoreRawAllocator stripe to
// prevent false sharing. Their control blocks will be adjacent
// thanks to allocate_shared().
for (auto slot : folly::enumerate(slots_)) {
auto alloc = getCoreAllocator<Holder, kNumSlots>(slot.index);
for (size_t i = 0; i < SlotsConfig::num(); ++i) {
// Try freeing the control block before allocating a new one.
slots_[i] = {};
if (!core_cached_shared_ptr_detail::isDefault(p)) {
auto alloc = getCoreAllocator<Holder, kMaxSlots>(i);
auto holder = std::allocate_shared<Holder>(alloc, p);
*slot = std::shared_ptr<T>(holder, p.get());
slots_[i] = std::shared_ptr<T>(holder, p.get());
}
}
}
std::shared_ptr<T> get() const {
return slots_[AccessSpreader<>::cachedCurrent(kNumSlots)];
return slots_[AccessSpreader<>::cachedCurrent(SlotsConfig::num())];
}
private:
......@@ -67,35 +112,38 @@ class CoreCachedSharedPtr {
template <class, size_t>
friend class CoreCachedWeakPtr;
std::array<std::shared_ptr<T>, kNumSlots> slots_;
std::array<std::shared_ptr<T>, kMaxSlots> slots_;
};
template <class T, size_t kNumSlots = kCoreCachedSharedPtrDefaultNumSlots>
template <class T, size_t kMaxSlots = kCoreCachedSharedPtrDefaultMaxSlots>
class CoreCachedWeakPtr {
using SlotsConfig = core_cached_shared_ptr_detail::SlotsConfig<kMaxSlots>;
public:
CoreCachedWeakPtr() = default;
explicit CoreCachedWeakPtr(const CoreCachedSharedPtr<T, kNumSlots>& p) {
explicit CoreCachedWeakPtr(const CoreCachedSharedPtr<T, kMaxSlots>& p) {
reset(p);
}
void reset() { *this = {}; }
void reset(const CoreCachedSharedPtr<T, kNumSlots>& p) {
for (auto slot : folly::enumerate(slots_)) {
*slot = p.slots_[slot.index];
void reset(const CoreCachedSharedPtr<T, kMaxSlots>& p) {
SlotsConfig::initialize();
for (size_t i = 0; i < SlotsConfig::num(); ++i) {
slots_[i] = p.slots_[i];
}
}
std::weak_ptr<T> get() const {
return slots_[AccessSpreader<>::cachedCurrent(kNumSlots)];
return slots_[AccessSpreader<>::cachedCurrent(SlotsConfig::num())];
}
// Faster than get().lock(), as it avoid one weak count cycle.
std::shared_ptr<T> lock() const {
return slots_[AccessSpreader<>::cachedCurrent(kNumSlots)].lock();
return slots_[AccessSpreader<>::cachedCurrent(SlotsConfig::num())].lock();
}
private:
std::array<std::weak_ptr<T>, kNumSlots> slots_;
std::array<std::weak_ptr<T>, kMaxSlots> slots_;
};
/**
......@@ -110,52 +158,53 @@ class CoreCachedWeakPtr {
* get()s will never see a newer pointer on one core, and an older
* pointer on another after a subsequent thread migration.
*/
template <class T, size_t kNumSlots = kCoreCachedSharedPtrDefaultNumSlots>
template <class T, size_t kMaxSlots = kCoreCachedSharedPtrDefaultMaxSlots>
class AtomicCoreCachedSharedPtr {
using SlotsConfig = core_cached_shared_ptr_detail::SlotsConfig<kMaxSlots>;
public:
explicit AtomicCoreCachedSharedPtr(const std::shared_ptr<T>& p = nullptr) {
reset(p);
}
AtomicCoreCachedSharedPtr() = default;
explicit AtomicCoreCachedSharedPtr(const std::shared_ptr<T>& p) { reset(p); }
~AtomicCoreCachedSharedPtr() {
auto slots = slots_.load(std::memory_order_acquire);
// Delete of AtomicCoreCachedSharedPtr must be synchronized, no
// need for slots->retire().
if (slots) {
delete slots;
}
delete slots_.load(std::memory_order_acquire);
}
void reset(const std::shared_ptr<T>& p = nullptr) {
auto newslots = std::make_unique<Slots>();
SlotsConfig::initialize();
std::unique_ptr<Slots> newslots;
if (!core_cached_shared_ptr_detail::isDefault(p)) {
newslots = std::make_unique<Slots>();
// Allocate each Holder in a different CoreRawAllocator stripe to
// prevent false sharing. Their control blocks will be adjacent
// thanks to allocate_shared().
for (auto slot : folly::enumerate(newslots->slots_)) {
auto alloc = getCoreAllocator<Holder, kNumSlots>(slot.index);
for (size_t i = 0; i < SlotsConfig::num(); ++i) {
auto alloc = getCoreAllocator<Holder, kMaxSlots>(i);
auto holder = std::allocate_shared<Holder>(alloc, p);
*slot = std::shared_ptr<T>(holder, p.get());
newslots->slots[i] = std::shared_ptr<T>(holder, p.get());
}
}
auto oldslots = slots_.exchange(newslots.release());
if (oldslots) {
if (auto oldslots = slots_.exchange(newslots.release())) {
oldslots->retire();
}
}
std::shared_ptr<T> get() const {
folly::hazptr_local<1> hazptr;
auto slots = hazptr[0].protect(slots_);
if (!slots) {
if (auto slots = hazptr[0].protect(slots_)) {
return slots->slots[AccessSpreader<>::cachedCurrent(SlotsConfig::num())];
} else {
return nullptr;
}
return (slots->slots_)[AccessSpreader<>::cachedCurrent(kNumSlots)];
}
private:
using Holder = std::shared_ptr<T>;
struct Slots : folly::hazptr_obj_base<Slots> {
std::array<std::shared_ptr<T>, kNumSlots> slots_;
std::array<std::shared_ptr<T>, kMaxSlots> slots;
};
std::atomic<Slots*> slots_{nullptr};
};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment