Commit 05ce5228 authored by Giuseppe Ottaviano's avatar Giuseppe Ottaviano Committed by Facebook Github Bot

Move CacheLocality out of detail/ and into concurrency/

Summary: There's no reason these utilities should only be used by folly.

Reviewed By: mzlee

Differential Revision: D5317894

fbshipit-source-id: 5a9bdf4c5efaa5bcbe78e6723a03a468f2fe5e32
parent 04cf6b8f
......@@ -292,6 +292,8 @@ if (BUILD_TESTS)
apply_folly_compile_options_to_target(folly_test_support)
folly_define_tests(
DIRECTORY concurrency/
TEST cache_locality_test SOURCES CacheLocalityTest.cpp
DIRECTORY experimental/test/
TEST autotimer_test SOURCES AutoTimerTest.cpp
TEST bits_test_2 SOURCES BitsTest.cpp
......@@ -467,7 +469,6 @@ if (BUILD_TESTS)
TEST baton_test SOURCES BatonTest.cpp
TEST bit_iterator_test SOURCES BitIteratorTest.cpp
TEST bits_test SOURCES BitsTest.cpp
TEST cache_locality_test SOURCES CacheLocalityTest.cpp
TEST cacheline_padded_test SOURCES CachelinePaddedTest.cpp
TEST call_once_test SOURCES CallOnceTest.cpp
TEST checksum_test SOURCES ChecksumTest.cpp
......
......@@ -16,14 +16,16 @@
#pragma once
#include <type_traits>
#include <assert.h>
#include <errno.h>
#include <stdint.h>
#include <type_traits>
#include <boost/noncopyable.hpp>
#include <folly/AtomicStruct.h>
#include <folly/Portability.h>
#include <folly/detail/CacheLocality.h>
#include <folly/concurrency/CacheLocality.h>
#include <folly/portability/SysMman.h>
#include <folly/portability/Unistd.h>
......@@ -497,7 +499,7 @@ struct IndexedMemPool : boost::noncopyable {
}
AtomicStruct<TaggedPtr,Atom>& localHead() {
auto stripe = detail::AccessSpreader<Atom>::current(NumLocalLists);
auto stripe = AccessSpreader<Atom>::current(NumLocalLists);
return local_[stripe].head;
}
......
......@@ -27,7 +27,7 @@
#include <folly/Baton.h>
#include <folly/IndexedMemPool.h>
#include <folly/Likely.h>
#include <folly/detail/CacheLocality.h>
#include <folly/concurrency/CacheLocality.h>
namespace folly {
......@@ -515,9 +515,7 @@ struct LifoSemBase {
FOLLY_ALIGN_TO_AVOID_FALSE_SHARING
folly::AtomicStruct<LifoSemHead,Atom> head_;
char padding_[folly::detail::CacheLocality::kFalseSharingRange -
sizeof(LifoSemHead)];
char padding_[folly::CacheLocality::kFalseSharingRange - sizeof(LifoSemHead)];
static LifoSemNode<Handoff, Atom>& idxToNode(uint32_t idx) {
auto raw = &LifoSemRawNode<Atom>::pool()[idx];
......
......@@ -25,7 +25,7 @@
#include <type_traits>
#include <folly/Traits.h>
#include <folly/detail/CacheLocality.h>
#include <folly/concurrency/CacheLocality.h>
#include <folly/detail/TurnSequencer.h>
#include <folly/portability/Unistd.h>
......@@ -647,11 +647,11 @@ class MPMCQueueBase<Derived<T, Atom, Dynamic>> : boost::noncopyable {
}
// ideally this would be a static assert, but g++ doesn't allow it
assert(alignof(MPMCQueue<T,Atom>)
>= detail::CacheLocality::kFalseSharingRange);
assert(static_cast<uint8_t*>(static_cast<void*>(&popTicket_))
- static_cast<uint8_t*>(static_cast<void*>(&pushTicket_))
>= detail::CacheLocality::kFalseSharingRange);
assert(alignof(MPMCQueue<T, Atom>) >= CacheLocality::kFalseSharingRange);
assert(
static_cast<uint8_t*>(static_cast<void*>(&popTicket_)) -
static_cast<uint8_t*>(static_cast<void*>(&pushTicket_)) >=
CacheLocality::kFalseSharingRange);
}
/// A default-constructed queue is useful because a usable (non-zero
......@@ -971,8 +971,7 @@ class MPMCQueueBase<Derived<T, Atom, Dynamic>> : boost::noncopyable {
/// To avoid false sharing in slots_ with neighboring memory
/// allocations, we pad it with this many SingleElementQueue-s at
/// each end
kSlotPadding = (detail::CacheLocality::kFalseSharingRange - 1)
/ sizeof(Slot) + 1
kSlotPadding = (CacheLocality::kFalseSharingRange - 1) / sizeof(Slot) + 1
};
/// The maximum number of items in the queue at once
......@@ -1024,8 +1023,7 @@ class MPMCQueueBase<Derived<T, Atom, Dynamic>> : boost::noncopyable {
/// Alignment doesn't prevent false sharing at the end of the struct,
/// so fill out the last cache line
char padding_[detail::CacheLocality::kFalseSharingRange -
sizeof(Atom<uint32_t>)];
char padding_[CacheLocality::kFalseSharingRange - sizeof(Atom<uint32_t>)];
/// We assign tickets in increasing order, but we don't want to
/// access neighboring elements of slots_ because that will lead to
......
......@@ -56,12 +56,12 @@ nobase_follyinclude_HEADERS = \
CppAttributes.h \
CpuId.h \
CPortability.h \
concurrency/CacheLocality.h \
concurrency/CoreCachedSharedPtr.h \
detail/AtomicHashUtils.h \
detail/AtomicUnorderedMapUtils.h \
detail/AtomicUtils.h \
detail/BitIteratorDetail.h \
detail/CacheLocality.h \
detail/CachelinePaddedImpl.h \
detail/ChecksumDetail.h \
detail/DiscriminatedPtrDetail.h \
......@@ -459,7 +459,7 @@ libfolly_la_SOURCES = \
Assume.cpp \
Checksum.cpp \
ClockGettimeWrappers.cpp \
detail/CacheLocality.cpp \
concurrency/CacheLocality.cpp \
detail/IPAddress.cpp \
dynamic.cpp \
ExceptionWrapper.cpp \
......
......@@ -27,7 +27,7 @@
#include <type_traits>
#include <utility>
#include <folly/detail/CacheLocality.h>
#include <folly/concurrency/CacheLocality.h>
namespace folly {
......@@ -168,14 +168,14 @@ struct ProducerConsumerQueue {
}
private:
char pad0_[detail::CacheLocality::kFalseSharingRange];
char pad0_[CacheLocality::kFalseSharingRange];
const uint32_t size_;
T* const records_;
FOLLY_ALIGN_TO_AVOID_FALSE_SHARING std::atomic<unsigned int> readIndex_;
FOLLY_ALIGN_TO_AVOID_FALSE_SHARING std::atomic<unsigned int> writeIndex_;
char pad1_[detail::CacheLocality::kFalseSharingRange - sizeof(writeIndex_)];
char pad1_[CacheLocality::kFalseSharingRange - sizeof(writeIndex_)];
};
}
......@@ -19,11 +19,13 @@
#pragma once
#include <stdint.h>
#include <atomic>
#include <thread>
#include <type_traits>
#include <folly/Likely.h>
#include <folly/detail/CacheLocality.h>
#include <folly/concurrency/CacheLocality.h>
#include <folly/detail/Futex.h>
#include <folly/portability/Asm.h>
#include <folly/portability/SysResource.h>
......@@ -1417,8 +1419,7 @@ bool SharedMutexImpl<ReaderPriority, Tag_, Atom, BlockImmediately>::
// starting point for our empty-slot search, can change after
// calling waitForZeroBits
uint32_t bestSlot =
(uint32_t)folly::detail::AccessSpreader<Atom>::current(
kMaxDeferredReaders);
(uint32_t)folly::AccessSpreader<Atom>::current(kMaxDeferredReaders);
// deferred readers are already enabled, or it is time to
// enable them if we can find a slot
......
......@@ -21,7 +21,7 @@
#include <chrono>
#include <folly/Likely.h>
#include <folly/detail/CacheLocality.h>
#include <folly/concurrency/CacheLocality.h>
namespace folly {
......
......@@ -14,7 +14,7 @@
* limitations under the License.
*/
#include <folly/detail/CacheLocality.h>
#include <folly/concurrency/CacheLocality.h>
#ifndef _MSC_VER
#define _GNU_SOURCE 1 // for RTLD_NOLOAD
......@@ -29,7 +29,6 @@
#include <folly/ScopeGuard.h>
namespace folly {
namespace detail {
///////////// CacheLocality
......@@ -244,13 +243,13 @@ SimpleAllocator::SimpleAllocator(size_t allocSize, size_t sz)
SimpleAllocator::~SimpleAllocator() {
std::lock_guard<std::mutex> g(m_);
for (auto& block : blocks_) {
aligned_free(block);
detail::aligned_free(block);
}
}
void* SimpleAllocator::allocateHard() {
// Allocate a new slab.
mem_ = static_cast<uint8_t*>(aligned_malloc(allocSize_, allocSize_));
mem_ = static_cast<uint8_t*>(detail::aligned_malloc(allocSize_, allocSize_));
if (!mem_) {
std::__throw_bad_alloc();
}
......@@ -271,5 +270,4 @@ void* SimpleAllocator::allocateHard() {
return mem;
}
} // namespace detail
} // namespace folly
......@@ -38,7 +38,6 @@
#include <folly/portability/Memory.h>
namespace folly {
namespace detail {
// This file contains several classes that might be useful if you are
// trying to dynamically optimize cache locality: CacheLocality reads
......@@ -458,7 +457,8 @@ class CoreAllocator {
// Align to a cacheline
size = size + (CacheLocality::kFalseSharingRange - 1);
size &= ~size_t(CacheLocality::kFalseSharingRange - 1);
void* mem = aligned_malloc(size, CacheLocality::kFalseSharingRange);
void* mem =
detail::aligned_malloc(size, CacheLocality::kFalseSharingRange);
if (!mem) {
std::__throw_bad_alloc();
}
......@@ -478,7 +478,7 @@ class CoreAllocator {
auto allocator = *static_cast<SimpleAllocator**>(addr);
allocator->deallocate(mem);
} else {
aligned_free(mem);
detail::aligned_free(mem);
}
}
};
......@@ -507,5 +507,4 @@ StlAllocator<typename CoreAllocator<Stripes>::Allocator, T> getCoreAllocatorStl(
return StlAllocator<typename CoreAllocator<Stripes>::Allocator, T>(alloc);
}
} // namespace detail
} // namespace folly
......@@ -20,7 +20,7 @@
#include <memory>
#include <folly/Enumerate.h>
#include <folly/detail/CacheLocality.h>
#include <folly/concurrency/CacheLocality.h>
namespace folly {
......@@ -46,14 +46,14 @@ class CoreCachedSharedPtr {
// prevent false sharing. Their control blocks will be adjacent
// thanks to allocate_shared().
for (auto slot : folly::enumerate(slots_)) {
auto alloc = detail::getCoreAllocatorStl<Holder, kNumSlots>(slot.index);
auto alloc = getCoreAllocatorStl<Holder, kNumSlots>(slot.index);
auto holder = std::allocate_shared<Holder>(alloc, p);
*slot = std::shared_ptr<T>(holder, p.get());
}
}
std::shared_ptr<T> get() const {
return slots_[detail::AccessSpreader<>::current(kNumSlots)];
return slots_[AccessSpreader<>::current(kNumSlots)];
}
private:
......@@ -75,7 +75,7 @@ class CoreCachedWeakPtr {
}
std::weak_ptr<T> get() const {
return slots_[detail::AccessSpreader<>::current(kNumSlots)];
return slots_[AccessSpreader<>::current(kNumSlots)];
}
private:
......
......@@ -14,7 +14,7 @@
* limitations under the License.
*/
#include <folly/detail/CacheLocality.h>
#include <folly/concurrency/CacheLocality.h>
#include <memory>
#include <thread>
......@@ -24,7 +24,7 @@
#include <folly/Benchmark.h>
using namespace folly::detail;
using namespace folly;
#define DECLARE_SPREADER_TAG(tag, locality, func) \
namespace { \
......@@ -32,7 +32,6 @@ using namespace folly::detail;
struct tag {}; \
} \
namespace folly { \
namespace detail { \
template <> \
const CacheLocality& CacheLocality::system<tag>() { \
static auto* inst = new CacheLocality(locality); \
......@@ -42,16 +41,16 @@ using namespace folly::detail;
Getcpu::Func AccessSpreader<tag>::pickGetcpuFunc() { \
return func; \
} \
} \
}
DECLARE_SPREADER_TAG(
ThreadLocalTag,
CacheLocality::system<>(),
folly::detail::FallbackGetcpu<SequentialThreadId<std::atomic>>::getcpu)
DECLARE_SPREADER_TAG(PthreadSelfTag,
folly::FallbackGetcpu<SequentialThreadId<std::atomic>>::getcpu)
DECLARE_SPREADER_TAG(
PthreadSelfTag,
CacheLocality::system<>(),
folly::detail::FallbackGetcpu<HashingThreadId>::getcpu)
folly::FallbackGetcpu<HashingThreadId>::getcpu)
BENCHMARK(AccessSpreaderUse, iters) {
for (unsigned long i = 0; i < iters; ++i) {
......
......@@ -14,7 +14,7 @@
* limitations under the License.
*/
#include <folly/detail/CacheLocality.h>
#include <folly/concurrency/CacheLocality.h>
#include <folly/portability/GTest.h>
......@@ -24,7 +24,7 @@
#include <unordered_map>
#include <glog/logging.h>
using namespace folly::detail;
using namespace folly;
/// This is the relevant nodes from a production box's sysfs tree. If you
/// think this map is ugly you should see the version of this test that
......@@ -363,13 +363,12 @@ TEST(Getcpu, VdsoGetcpu) {
#ifdef FOLLY_TLS
TEST(ThreadId, SimpleTls) {
unsigned cpu = 0;
auto rv =
folly::detail::FallbackGetcpu<SequentialThreadId<std::atomic>>::getcpu(
auto rv = folly::FallbackGetcpu<SequentialThreadId<std::atomic>>::getcpu(
&cpu, nullptr, nullptr);
EXPECT_EQ(rv, 0);
EXPECT_TRUE(cpu > 0);
unsigned again;
folly::detail::FallbackGetcpu<SequentialThreadId<std::atomic>>::getcpu(
folly::FallbackGetcpu<SequentialThreadId<std::atomic>>::getcpu(
&again, nullptr, nullptr);
EXPECT_EQ(cpu, again);
}
......@@ -377,13 +376,12 @@ TEST(ThreadId, SimpleTls) {
TEST(ThreadId, SimplePthread) {
unsigned cpu = 0;
auto rv = folly::detail::FallbackGetcpu<HashingThreadId>::getcpu(
&cpu, nullptr, nullptr);
auto rv =
folly::FallbackGetcpu<HashingThreadId>::getcpu(&cpu, nullptr, nullptr);
EXPECT_EQ(rv, 0);
EXPECT_TRUE(cpu > 0);
unsigned again;
folly::detail::FallbackGetcpu<HashingThreadId>::getcpu(
&again, nullptr, nullptr);
folly::FallbackGetcpu<HashingThreadId>::getcpu(&again, nullptr, nullptr);
EXPECT_EQ(cpu, again);
}
......@@ -414,7 +412,6 @@ TEST(AccessSpreader, Simple) {
struct tag {}; \
} \
namespace folly { \
namespace detail { \
template <> \
const CacheLocality& CacheLocality::system<tag>() { \
static auto* inst = new CacheLocality(locality); \
......@@ -424,7 +421,6 @@ TEST(AccessSpreader, Simple) {
Getcpu::Func AccessSpreader<tag>::pickGetcpuFunc() { \
return func; \
} \
} \
}
DECLARE_SPREADER_TAG(ManualTag, CacheLocality::uniform(16), testingGetcpu)
......
......@@ -16,7 +16,7 @@
#pragma once
#include <folly/detail/CacheLocality.h>
#include <folly/concurrency/CacheLocality.h>
namespace folly {
......@@ -33,7 +33,7 @@ struct CachelinePaddedImpl;
// We need alignas(T) alignas(kFalseSharingRange) for the case where alignof(T)
// > alignof(kFalseSharingRange).
template <typename T>
struct alignas(T) alignas(detail::CacheLocality::kFalseSharingRange)
struct alignas(T) alignas(CacheLocality::kFalseSharingRange)
CachelinePaddedImpl<T, /* needsPadding = */ false> {
template <typename... Args>
explicit CachelinePaddedImpl(Args&&... args)
......@@ -42,7 +42,7 @@ struct alignas(T) alignas(detail::CacheLocality::kFalseSharingRange)
};
template <typename T>
struct alignas(T) alignas(detail::CacheLocality::kFalseSharingRange)
struct alignas(T) alignas(CacheLocality::kFalseSharingRange)
CachelinePaddedImpl<T, /* needsPadding = */ true> {
template <typename... Args>
explicit CachelinePaddedImpl(Args&&... args)
......
......@@ -21,7 +21,7 @@
#include <folly/Malloc.h>
#include <folly/Portability.h>
#include <folly/ScopeGuard.h>
#include <folly/detail/CacheLocality.h>
#include <folly/concurrency/CacheLocality.h>
#include <folly/portability/PThread.h>
#include <folly/portability/SysMman.h>
#include <folly/portability/Unistd.h>
......
......@@ -20,7 +20,7 @@
#include <folly/Function.h>
#include <folly/IndexedMemPool.h>
#include <folly/Portability.h>
#include <folly/detail/CacheLocality.h>
#include <folly/concurrency/CacheLocality.h>
#include <atomic>
#include <cassert>
......
......@@ -26,7 +26,7 @@ static_assert(
std::is_standard_layout<CachelinePadded<int>>::value,
"CachelinePadded<T> must be standard-layout if T is.");
const int kCachelineSize = folly::detail::CacheLocality::kFalseSharingRange;
const int kCachelineSize = folly::CacheLocality::kFalseSharingRange;
template <int dataSize>
struct SizedData {
......
......@@ -382,6 +382,7 @@ int Futex<DeterministicAtomic>::futexWake(int count, uint32_t wakeMask) {
DeterministicSchedule::afterSharedAccess();
return rv;
}
}
template <>
CacheLocality const& CacheLocality::system<test::DeterministicAtomic>() {
......@@ -391,7 +392,6 @@ CacheLocality const& CacheLocality::system<test::DeterministicAtomic>() {
template <>
Getcpu::Func AccessSpreader<test::DeterministicAtomic>::pickGetcpuFunc() {
return &DeterministicSchedule::getcpu;
}
return &detail::DeterministicSchedule::getcpu;
}
}
......@@ -28,8 +28,8 @@
#include <vector>
#include <folly/ScopeGuard.h>
#include <folly/concurrency/CacheLocality.h>
#include <folly/detail/AtomicUtils.h>
#include <folly/detail/CacheLocality.h>
#include <folly/detail/Futex.h>
#include <folly/portability/Semaphore.h>
......@@ -499,8 +499,9 @@ FutexResult Futex<test::DeterministicAtomic>::futexWaitImpl(
std::chrono::time_point<std::chrono::system_clock>* absSystemTime,
std::chrono::time_point<std::chrono::steady_clock>* absSteadyTime,
uint32_t waitMask);
}
template <>
Getcpu::Func AccessSpreader<test::DeterministicAtomic>::pickGetcpuFunc();
}
} // namespace folly::detail
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment