Commit 6eb46044 authored by Yedidya Feldblum's avatar Yedidya Feldblum Committed by Facebook GitHub Bot

use relaxed_atomic in CacheLocality

Summary: The stripe table, which is a table of atomics, is its own data and does not guard any associated data. It must technically be atomic to avoid technical data race, but all accesses are relaxed.

Differential Revision: D31524230

fbshipit-source-id: 782b515f3845b4567ca258fd8d096f22248d60ff
parent 9031f3d9
...@@ -374,17 +374,14 @@ bool AccessSpreaderBase::initialize( ...@@ -374,17 +374,14 @@ bool AccessSpreaderBase::initialize(
assert(index < n); assert(index < n);
// as index goes from 0..n, post-transform value goes from // as index goes from 0..n, post-transform value goes from
// 0..numStripes // 0..numStripes
row[cpu].store( row[cpu] = static_cast<CompactStripe>((index * numStripes) / n);
static_cast<CompactStripe>((index * numStripes) / n),
std::memory_order_relaxed);
assert(row[cpu] < numStripes); assert(row[cpu] < numStripes);
} }
size_t filled = n; size_t filled = n;
while (filled < kMaxCpus) { while (filled < kMaxCpus) {
size_t len = std::min(filled, kMaxCpus - filled); size_t len = std::min(filled, kMaxCpus - filled);
for (size_t i = 0; i < len; ++i) { for (size_t i = 0; i < len; ++i) {
row[filled + i].store( row[filled + i] = row[i].load();
row[i].load(std::memory_order_relaxed), std::memory_order_relaxed);
} }
filled += len; filled += len;
} }
......
...@@ -35,6 +35,7 @@ ...@@ -35,6 +35,7 @@
#include <folly/detail/StaticSingletonManager.h> #include <folly/detail/StaticSingletonManager.h>
#include <folly/lang/Align.h> #include <folly/lang/Align.h>
#include <folly/lang/Exception.h> #include <folly/lang/Exception.h>
#include <folly/synchronization/RelaxedAtomic.h>
namespace folly { namespace folly {
...@@ -191,7 +192,8 @@ class AccessSpreaderBase { ...@@ -191,7 +192,8 @@ class AccessSpreaderBase {
kMaxCpus - 1 <= std::numeric_limits<CompactStripe>::max(), kMaxCpus - 1 <= std::numeric_limits<CompactStripe>::max(),
"stripeByCpu element type isn't wide enough"); "stripeByCpu element type isn't wide enough");
using CompactStripeTable = std::atomic<CompactStripe>[kMaxCpus + 1][kMaxCpus]; using CompactStripeTable =
relaxed_atomic<CompactStripe>[kMaxCpus + 1][kMaxCpus];
struct GlobalState { struct GlobalState {
/// For each level of splitting up to kMaxCpus, maps the cpu (mod /// For each level of splitting up to kMaxCpus, maps the cpu (mod
...@@ -281,8 +283,7 @@ struct AccessSpreader : private detail::AccessSpreaderBase { ...@@ -281,8 +283,7 @@ struct AccessSpreader : private detail::AccessSpreaderBase {
unsigned cpu; unsigned cpu;
s.getcpu.load(std::memory_order_relaxed)(&cpu, nullptr, nullptr); s.getcpu.load(std::memory_order_relaxed)(&cpu, nullptr, nullptr);
return s.table[std::min(size_t(kMaxCpus), numStripes)][cpu % kMaxCpus].load( return s.table[std::min(size_t(kMaxCpus), numStripes)][cpu % kMaxCpus];
std::memory_order_relaxed);
} }
/// Returns the stripe associated with the current CPU. The returned /// Returns the stripe associated with the current CPU. The returned
...@@ -296,8 +297,7 @@ struct AccessSpreader : private detail::AccessSpreaderBase { ...@@ -296,8 +297,7 @@ struct AccessSpreader : private detail::AccessSpreaderBase {
if (kIsMobile) { if (kIsMobile) {
return current(numStripes); return current(numStripes);
} }
return s.table[std::min(size_t(kMaxCpus), numStripes)][cpuCache().cpu(s)] return s.table[std::min(size_t(kMaxCpus), numStripes)][cpuCache().cpu(s)];
.load(std::memory_order_relaxed);
} }
/// Returns the maximum stripe value that can be returned under any /// Returns the maximum stripe value that can be returned under any
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment