A core-cached shared_ptr

Summary: This class serves a similar use case to `ReadMostlySharedPtr`, but with two differences: - It returns actual `shared_ptr`s, so it can be used as a drop-in replacement. - Its memory utilization is constant, independent of the number of threads that use it. Also, the code is much simpler. On the other hand, it is about 4x slower (but it still scales linearly in the number of concurrent threads acquiring the reference). Reviewed By: djwatson Differential Revision: D5104730 fbshipit-source-id: 8c18b635e0390394b06417b6df8b790e4bd2d90d

A core-cached shared_ptr
Summary: This class serves a similar use case to `ReadMostlySharedPtr`, but with two differences: - It returns actual `shared_ptr`s, so it can be used as a drop-in replacement. - Its memory utilization is constant, independent of the number of threads that use it. Also, the code is much simpler. On the other hand, it is about 4x slower (but it still scales linearly in the number of concurrent threads acquiring the reference). Reviewed By: djwatson Differential Revision: D5104730 fbshipit-source-id: 8c18b635e0390394b06417b6df8b790e4bd2d90d
b1eb5b68 · Giuseppe Ottaviano · Facebook Github Bot · 8bca2fb2 · b1eb5b68 · b1eb5b68
Commit b1eb5b68 authored May 23, 2017 by Giuseppe Ottaviano Committed by Facebook Github Bot May 23, 2017
5 changed files
--- a/folly/Enumerate.h
+++ b/folly/Enumerate.h
@@ -21,7 +21,7 @@

 #include <folly/portability/SysTypes.h>

-/*
+/**
 * Similar to Python's enumerate(), folly::enumerate() can be used to
 * iterate a range with a for-range loop, and it also allows to
 * retrieve the count of iterations so far.

--- a/folly/Makefile.am
+++ b/folly/Makefile.am
@@ -58,6 +58,7 @@ nobase_follyinclude_HEADERS = \
 	CppAttributes.h \
 	CpuId.h \
 	CPortability.h \
+	concurrency/CoreCachedSharedPtr.h \
 	detail/AtomicHashUtils.h \
 	detail/AtomicUnorderedMapUtils.h \
 	detail/AtomicUtils.h \

--- a/folly/concurrency/CoreCachedSharedPtr.h
+++ b/folly/concurrency/CoreCachedSharedPtr.h
+/*
+ * Copyright 2017 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <array>
+#include <memory>
+
+#include <folly/Enumerate.h>
+#include <folly/detail/CacheLocality.h>
+
+namespace folly {
+
+/**
+ * This class creates core-local caches for a given shared_ptr, to
+ * mitigate contention when acquiring/releasing it.
+ *
+ * It has the same thread-safety guarantees as shared_ptr: it is safe
+ * to concurrently call get(), but reset()s must be synchronized with
+ * reads and other resets().
+ *
+ * @author Giuseppe Ottaviano <ott@fb.com>
+ */
+template <class T, size_t kNumSlots = 64>
+class CoreCachedSharedPtr {
+ public:
+  explicit CoreCachedSharedPtr(const std::shared_ptr<T>& p = nullptr) {
+    reset(p);
+  }
+
+  void reset(const std::shared_ptr<T>& p = nullptr) {
+    for (auto& slot : slots_) {
+      auto holder = std::make_shared<Holder>(p);
+      slot = std::shared_ptr<T>(holder, p.get());
+    }
+  }
+
+  std::shared_ptr<T> get() const {
+    return slots_[detail::AccessSpreader<>::current(kNumSlots)];
+  }
+
+ private:
+  template <class, size_t>
+  friend class CoreCachedWeakPtr;
+
+  // Space the Holders by a cache line, so their control blocks (which
+  // are adjacent to the slots thanks to make_shared()) will also be
+  // spaced.
+  struct FOLLY_ALIGN_TO_AVOID_FALSE_SHARING Holder {
+    explicit Holder(std::shared_ptr<T> p) : ptr(std::move(p)) {}
+    std::shared_ptr<T> ptr;
+  };
+
+  std::array<std::shared_ptr<T>, kNumSlots> slots_;
+};
+
+template <class T, size_t kNumSlots = 64>
+class CoreCachedWeakPtr {
+ public:
+  explicit CoreCachedWeakPtr(const CoreCachedSharedPtr<T, kNumSlots>& p) {
+    for (auto slot : folly::enumerate(slots_)) {
+      *slot = p.slots_[slot.index];
+    }
+  }
+
+  std::weak_ptr<T> get() const {
+    return slots_[detail::AccessSpreader<>::current(kNumSlots)];
+  }
+
+ private:
+  std::array<std::weak_ptr<T>, kNumSlots> slots_;
+};
+
+} // namespace
--- a/folly/concurrency/test/CoreCachedSharedPtrTest.cpp
+++ b/folly/concurrency/test/CoreCachedSharedPtrTest.cpp
+/*
+ * Copyright 2017 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <memory>
+#include <thread>
+#include <vector>
+
+#include <folly/Benchmark.h>
+#include <folly/Portability.h>
+#include <folly/concurrency/CoreCachedSharedPtr.h>
+#include <folly/portability/GTest.h>
+
+TEST(CoreCachedSharedPtr, Basic) {
+  auto p = std::make_shared<int>(1);
+  std::weak_ptr<int> wp(p);
+
+  folly::CoreCachedSharedPtr<int> cached(p);
+  folly::CoreCachedWeakPtr<int> wcached(cached);
+
+  std::shared_ptr<int> p2 = cached.get();
+  std::weak_ptr<int> wp2 = wcached.get();
+  ASSERT_TRUE(p2 != nullptr);
+  ASSERT_EQ(*p2, 1);
+  ASSERT_FALSE(wp2.expired());
+
+  p.reset();
+  cached.reset();
+  // p2 should survive.
+  ASSERT_FALSE(wp.expired());
+  // Here we don't know anything about wp2: could be expired even if
+  // there is a living reference to the main object.
+
+  p2.reset();
+  ASSERT_TRUE(wp.expired());
+  ASSERT_TRUE(wp2.expired());
+}
+
+namespace {
+
+template <class Operation>
+void parallelRun(Operation op, size_t numThreads, size_t iters) {
+  std::vector<std::thread> threads;
+
+  // Prevent the compiler from hoisting code out of the loop.
+  auto opNoinline = [&]() FOLLY_NOINLINE { op(); };
+
+  for (size_t t = 0; t < numThreads; ++t) {
+    threads.emplace_back([&] {
+      for (size_t i = 0; i < iters; ++i) {
+        opNoinline();
+      }
+    });
+  }
+
+  for (auto& t : threads)
+    t.join();
+}
+
+void benchmarkSharedPtrCopy(size_t numThreads, size_t iters) {
+  auto p = std::make_shared<int>(1);
+  parallelRun([&] { return p; }, numThreads, iters);
+}
+
+void benchmarkWeakPtrLock(size_t numThreads, size_t iters) {
+  auto p = std::make_shared<int>(1);
+  std::weak_ptr<int> wp = p;
+  parallelRun([&] { return wp.lock(); }, numThreads, iters);
+}
+
+void benchmarkCoreCachedSharedPtrGet(size_t numThreads, size_t iters) {
+  folly::CoreCachedSharedPtr<int> p(std::make_shared<int>(1));
+  parallelRun([&] { return p.get(); }, numThreads, iters);
+}
+
+void benchmarkCoreCachedWeakPtrLock(size_t numThreads, size_t iters) {
+  folly::CoreCachedSharedPtr<int> p(std::make_shared<int>(1));
+  folly::CoreCachedWeakPtr<int> wp(p);
+  parallelRun([&] { return wp.get().lock(); }, numThreads, iters);
+}
+
+} // namespace
+
+BENCHMARK(SharedPtrSingleThread, n) {
+  benchmarkSharedPtrCopy(1, n);
+}
+BENCHMARK(WeakPtrSingleThread, n) {
+  benchmarkWeakPtrLock(1, n);
+}
+BENCHMARK(CoreCachedSharedPtrSingleThread, n) {
+  benchmarkCoreCachedSharedPtrGet(1, n);
+}
+BENCHMARK(CoreCachedWeakPtrSingleThread, n) {
+  benchmarkCoreCachedWeakPtrLock(1, n);
+}
+
+BENCHMARK_DRAW_LINE();
+
+BENCHMARK(SharedPtr4Threads, n) {
+  benchmarkSharedPtrCopy(4, n);
+}
+BENCHMARK(WeakPtr4Threads, n) {
+  benchmarkWeakPtrLock(4, n);
+}
+BENCHMARK(CoreCachedSharedPtr4Threads, n) {
+  benchmarkCoreCachedSharedPtrGet(4, n);
+}
+BENCHMARK(CoreCachedWeakPtr4Threads, n) {
+  benchmarkCoreCachedWeakPtrLock(4, n);
+}
+
+BENCHMARK_DRAW_LINE();
+
+BENCHMARK(SharedPtr16Threads, n) {
+  benchmarkSharedPtrCopy(16, n);
+}
+BENCHMARK(WeakPtr16Threads, n) {
+  benchmarkWeakPtrLock(16, n);
+}
+BENCHMARK(CoreCachedSharedPtr16Threads, n) {
+  benchmarkCoreCachedSharedPtrGet(16, n);
+}
+BENCHMARK(CoreCachedWeakPtr16Threads, n) {
+  benchmarkCoreCachedWeakPtrLock(16, n);
+}
+
+int main(int argc, char** argv) {
+  testing::InitGoogleTest(&argc, argv);
+  gflags::ParseCommandLineFlags(&argc, &argv, true);
+
+  auto ret = RUN_ALL_TESTS();
+  if (ret == 0 && FLAGS_benchmark) {
+    folly::runBenchmarks();
+  }
+
+  return ret;
+}
--- a/folly/experimental/test/ReadMostlySharedPtrBenchmark.cpp
+++ b/folly/experimental/test/ReadMostlySharedPtrBenchmark.cpp
@@ -36,9 +36,11 @@ void benchmark(size_t n) {
  for (size_t t = 0; t < threadCount; ++t) {
    ts.emplace_back([&]() {
        WeakPtr<int> weakPtr(mainPtr);
+        // Prevent the compiler from hoisting code out of the loop.
+        auto op = [&]() FOLLY_NOINLINE { weakPtr.lock(); };

        for (size_t i = 0; i < n; ++i) {
-          weakPtr.lock();
+          op();
        }
      });
  }