Add CachelinePadded<T> to folly.

Summary: This class allows easy insertion of padding bytes after an object to ensure that two cacheline-padded elements of an array don't engage in false sharing. Reviewed By: yfeldblum Differential Revision: D3485144 fbshipit-source-id: a3ece1e34566b20f94ff9f66532b2386ab19a9b1

Add CachelinePadded<T> to folly.
Summary: This class allows easy insertion of padding bytes after an object to ensure that two cacheline-padded elements of an array don't engage in false sharing. Reviewed By: yfeldblum Differential Revision: D3485144 fbshipit-source-id: a3ece1e34566b20f94ff9f66532b2386ab19a9b1
e0ac799a · David Goldblatt · Facebook Github Bot 6 · a4430933 · e0ac799a · e0ac799a
Commit e0ac799a authored Aug 26, 2016 by David Goldblatt Committed by Facebook Github Bot 6 Aug 26, 2016
4 changed files
--- a/folly/CachelinePadded.h
+++ b/folly/CachelinePadded.h
+/*
+ * Copyright 2016 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <folly/detail/CachelinePaddedImpl.h>
+
+namespace folly {
+
+/**
+ * Holds a type T, in addition to enough padding to round the size up to the
+ * next multiple of the false sharing range used by folly.
+ *
+ * If T is standard-layout, then casting a T* you get from this class to a
+ * CachelinePadded<T>* is safe.
+ *
+ * This class handles padding, but imperfectly handles alignment. (Note that
+ * alignment matters for false-sharing: imagine a cacheline size of 64, and two
+ * adjacent 64-byte objects, with the first starting at an offset of 32. The
+ * last 32 bytes of the first object share a cacheline with the first 32 bytes
+ * of the second.). We alignas this class to be at least cacheline-sized, but
+ * it's implementation-defined what that means (since a cacheline is almost
+ * certainly larger than the maximum natural alignment). The following should be
+ * true for recent compilers on common architectures:
+ *
+ * For heap objects, alignment needs to be handled at the allocator level, such
+ * as with posix_memalign (this isn't necessary with jemalloc, which aligns
+ * objects that are a multiple of cacheline size to a cacheline).
+ *
+ * For static and stack objects, the alignment should be obeyed, and no specific
+ * intervention is necessary.
+ */
+template <typename T>
+class CachelinePadded {
+ public:
+  template <typename... Args>
+  explicit CachelinePadded(Args&&... args)
+      : impl_(std::forward<Args>(args)...) {}
+
+  CachelinePadded() {}
+
+  T* get() {
+    return &impl_.item;
+  }
+
+  const T* get() const {
+    return &impl_.item;
+  }
+
+  T* operator->() {
+    return get();
+  }
+
+  const T* operator->() const {
+    return get();
+  }
+
+  T& operator*() {
+    return *get();
+  }
+
+  const T& operator*() const {
+    return *get();
+  }
+
+ private:
+  detail::CachelinePaddedImpl<T> impl_;
+};
+}
--- a/folly/Makefile.am
+++ b/folly/Makefile.am
@@ -39,6 +39,7 @@ nobase_follyinclude_HEADERS = \
 	Baton.h \
 	Benchmark.h \
 	Bits.h \
+	CachelinePadded.h \
 	CallOnce.h \
 	Checksum.h \
 	ClockGettimeWrappers.h \
@@ -54,6 +55,7 @@ nobase_follyinclude_HEADERS = \
 	detail/BitIteratorDetail.h \
 	detail/BitsDetail.h \
 	detail/CacheLocality.h \
+	detail/CachelinePaddedImpl.h \
 	detail/ChecksumDetail.h \
 	detail/DiscriminatedPtrDetail.h \
 	detail/ExceptionWrapper.h \

--- a/folly/detail/CachelinePaddedImpl.h
+++ b/folly/detail/CachelinePaddedImpl.h
+/*
+ * Copyright 2016 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <folly/detail/CacheLocality.h>
+
+namespace folly {
+
+template <typename T>
+class CachelinePadded;
+
+namespace detail {
+
+template <
+    typename T,
+    bool needsPadding = (sizeof(T) % CacheLocality::kFalseSharingRange != 0)>
+struct CachelinePaddedImpl;
+
+// We need alignas(T) alignas(kFalseSharingRange) for the case where alignof(T)
+// > alignof(kFalseSharingRange).
+template <typename T>
+struct alignas(T) alignas(detail::CacheLocality::kFalseSharingRange)
+    CachelinePaddedImpl<T, /* needsPadding = */ false> {
+  template <typename... Args>
+  explicit CachelinePaddedImpl(Args&&... args)
+      : item(std::forward<Args>(args)...) {}
+  T item;
+};
+
+template <typename T>
+struct alignas(T) alignas(detail::CacheLocality::kFalseSharingRange)
+    CachelinePaddedImpl<T, /* needsPadding = */ true> {
+  template <typename... Args>
+  explicit CachelinePaddedImpl(Args&&... args)
+      : item(std::forward<Args>(args)...) {}
+
+  T item;
+  char padding
+      [CacheLocality::kFalseSharingRange -
+       sizeof(T) % CacheLocality::kFalseSharingRange];
+};
+} // namespace detail
+} // namespace folly
--- a/folly/test/CachelinePaddedTest.cpp
+++ b/folly/test/CachelinePaddedTest.cpp
+/*
+ * Copyright 2016 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <folly/CachelinePadded.h>
+
+#include <type_traits>
+
+#include <gtest/gtest.h>
+
+using folly::CachelinePadded;
+
+static_assert(
+    std::is_standard_layout<CachelinePadded<int>>::value,
+    "CachelinePadded<T> must be standard-layout if T is.");
+
+const int kCachelineSize = folly::detail::CacheLocality::kFalseSharingRange;
+
+template <int dataSize>
+struct SizedData {
+  SizedData() {
+    for (unsigned i = 0; i < dataSize; ++i) {
+      data[i] = i;
+    }
+  }
+
+  void doModifications() {
+    for (unsigned i = 0; i < dataSize; ++i) {
+      EXPECT_EQ(static_cast<unsigned char>(i), data[i]);
+      ++data[i];
+    }
+  }
+
+  ~SizedData() {
+    for (unsigned i = 0; i < dataSize; ++i) {
+      EXPECT_EQ(static_cast<unsigned char>(i + 1), data[i]);
+    }
+  }
+
+  unsigned char data[dataSize];
+};
+
+using ExactlyCachelineSized = SizedData<kCachelineSize>;
+using DoubleCachelineSized = SizedData<2 * kCachelineSize>;
+using BelowCachelineSized = SizedData<kCachelineSize / 2>;
+using AboveCachelineSized = SizedData<kCachelineSize + kCachelineSize / 2>;
+
+TEST(CachelinePadded, Exact) {
+  EXPECT_EQ(kCachelineSize, sizeof(CachelinePadded<ExactlyCachelineSized>));
+  CachelinePadded<ExactlyCachelineSized> item;
+  item.get()->doModifications();
+  EXPECT_TRUE(
+      reinterpret_cast<CachelinePadded<ExactlyCachelineSized>*>(item.get()) ==
+      &item);
+}
+
+TEST(CachelinePadded, Double) {
+  EXPECT_EQ(2 * kCachelineSize, sizeof(CachelinePadded<DoubleCachelineSized>));
+  CachelinePadded<DoubleCachelineSized> item;
+  item.get()->doModifications();
+  EXPECT_TRUE(
+      reinterpret_cast<CachelinePadded<DoubleCachelineSized>*>(item.get()) ==
+      &item);
+}
+
+TEST(CachelinePadded, Below) {
+  EXPECT_EQ(kCachelineSize, sizeof(CachelinePadded<BelowCachelineSized>));
+  CachelinePadded<BelowCachelineSized> item;
+  item.get()->doModifications();
+  EXPECT_TRUE(
+      reinterpret_cast<CachelinePadded<BelowCachelineSized>*>(item.get()) ==
+      &item);
+}
+
+TEST(CachelinePadded, Above) {
+  EXPECT_EQ(2 * kCachelineSize, sizeof(CachelinePadded<AboveCachelineSized>));
+  CachelinePadded<AboveCachelineSized> item;
+  item.get()->doModifications();
+  EXPECT_TRUE(
+      reinterpret_cast<CachelinePadded<AboveCachelineSized>*>(item.get()) ==
+      &item);
+}
+
+TEST(CachelinePadded, CanBeCastedBack) {
+  CachelinePadded<int> padded;
+  CachelinePadded<int>* ptr =
+      reinterpret_cast<CachelinePadded<int>*>(padded.get());
+  EXPECT_EQ(&padded, ptr);
+}
+
+TEST(CachelinePadded, PtrOperator) {
+  CachelinePadded<int> padded;
+  EXPECT_TRUE(padded.get() == padded.operator->());
+  EXPECT_TRUE(&*padded == padded.get());
+  const CachelinePadded<int> constPadded;
+  EXPECT_TRUE(constPadded.get() == constPadded.operator->());
+  EXPECT_TRUE(constPadded.get() == &*constPadded.get());
+}
+
+TEST(CachelinePadded, PropagatesConstness) {
+  struct OverloadedOnConst {
+    void assign(int* dst) {
+      *dst = 31415;
+    }
+    void assign(int* dst) const {
+      *dst = 271828;
+    }
+  };
+
+  CachelinePadded<OverloadedOnConst> padded;
+
+  int i = 0;
+  padded->assign(&i);
+  EXPECT_EQ(31415, i);
+
+  const CachelinePadded<OverloadedOnConst> constPadded;
+  constPadded->assign(&i);
+  EXPECT_EQ(271828, i);
+}
+
+TEST(CachelinePadded, ConstructsAndDestructs) {
+  enum LifetimeStatus {
+    kNone,
+    kConstructed,
+    kDestroyed,
+  };
+  struct WriteOnLifetimeOp {
+    explicit WriteOnLifetimeOp(LifetimeStatus* dst) : dst_(dst) {
+      *dst = kConstructed;
+    }
+    ~WriteOnLifetimeOp() {
+      *dst_ = kDestroyed;
+    }
+    LifetimeStatus* dst_;
+  };
+  LifetimeStatus status = kNone;
+  CachelinePadded<WriteOnLifetimeOp>* ptr =
+      new CachelinePadded<WriteOnLifetimeOp>(&status);
+  EXPECT_EQ(kConstructed, status);
+  delete ptr;
+  EXPECT_EQ(kDestroyed, status);
+}
+
+TEST(CachelinePadded, ConstructsAndDestructsArrays) {
+  static thread_local int numConstructions;
+  static thread_local int numDestructions;
+  numConstructions = 0;
+  numDestructions = 0;
+  struct LifetimeCountingClass {
+    LifetimeCountingClass() {
+      ++numConstructions;
+    }
+    ~LifetimeCountingClass() {
+      ++numDestructions;
+    }
+  };
+  const static int kNumItems = 123;
+  CachelinePadded<LifetimeCountingClass>* ptr =
+      new CachelinePadded<LifetimeCountingClass>[kNumItems];
+  EXPECT_EQ(kNumItems, numConstructions);
+  delete[] ptr;
+  EXPECT_EQ(kNumItems, numDestructions);
+}
+
+TEST(CachelinePadded, ForwardsCorrectly) {
+  struct RvalueOverloadedConstructor {
+    RvalueOverloadedConstructor(int* dst, int& /* ignored */) {
+      *dst = 0;
+    }
+    RvalueOverloadedConstructor(int* dst, int&& /* ignored */) {
+      *dst = 1;
+    }
+  };
+  int shouldBeZero = 12345;
+  int shouldBeOne = 67890;
+  {
+    int ignored = 42;
+    CachelinePadded<RvalueOverloadedConstructor> padded1(
+        &shouldBeZero, ignored);
+    CachelinePadded<RvalueOverloadedConstructor> padded2(
+        &shouldBeOne, static_cast<int&&>(ignored));
+  }
+  EXPECT_EQ(0, shouldBeZero);
+  EXPECT_EQ(1, shouldBeOne);
+}