Commit ec06f66c authored by Elizabeth Smith's avatar Elizabeth Smith Committed by Sara Golemon

abstract thread_local support

Summary:
change from using __thread to using FOLLY_THREAD_LOCAL macro, this will allow abstraction over gcc and msvc implementations of thread local (__thread and __declspec(thread)) which have the same semantices and will also allow drop in replacement of thread_local when compiler support for the feature is complete  This doesn't do anything about apple, however, which still has broken __thread support

This doesn't actually change any implementation for now, simply allows for correct compilation

Test Plan: fbmake runtests

Reviewed By: delong.j@fb.com

FB internal diff: D1278726
parent f585e98a
...@@ -95,6 +95,18 @@ struct MaxAlign { char c; } __attribute__((aligned)); ...@@ -95,6 +95,18 @@ struct MaxAlign { char c; } __attribute__((aligned));
# endif # endif
#endif #endif
/* Platform specific TLS support
* gcc implements __thread
* msvc implements __declspec(thread)
* the semantics are the same (but remember __thread is broken on apple)
*/
#if defined(_MSC_VER)
# define FOLLY_TLS __declspec(thread)
#elif defined(__GNUC__) || defined(__clang__)
# define FOLLY_TLS __thread
#else
# error cannot define platform specific thread local storage
#endif
// Define to 1 if you have the `preadv' and `pwritev' functions, respectively // Define to 1 if you have the `preadv' and `pwritev' functions, respectively
#if !defined(FOLLY_HAVE_PREADV) && !defined(FOLLY_HAVE_PWRITEV) #if !defined(FOLLY_HAVE_PREADV) && !defined(FOLLY_HAVE_PWRITEV)
......
...@@ -128,7 +128,8 @@ class ThreadLocal { ...@@ -128,7 +128,8 @@ class ThreadLocal {
* NOTE: Apple platforms don't support the same semantics for __thread that * NOTE: Apple platforms don't support the same semantics for __thread that
* Linux does (and it's only supported at all on i386). For these, use * Linux does (and it's only supported at all on i386). For these, use
* pthread_setspecific()/pthread_getspecific() for the per-thread * pthread_setspecific()/pthread_getspecific() for the per-thread
* storage. * storage. Windows (MSVC and GCC) does support the same semantics
* with __declspec(thread)
*/ */
template<class T, class Tag=void> template<class T, class Tag=void>
......
...@@ -230,7 +230,7 @@ template<> ...@@ -230,7 +230,7 @@ template<>
std::atomic<size_t> SequentialThreadId<std::atomic>::prevId(0); std::atomic<size_t> SequentialThreadId<std::atomic>::prevId(0);
template<> template<>
__thread size_t SequentialThreadId<std::atomic>::currentId(0); FOLLY_TLS size_t SequentialThreadId<std::atomic>::currentId(0);
/////////////// AccessSpreader /////////////// AccessSpreader
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include <type_traits> #include <type_traits>
#include <vector> #include <vector>
#include "folly/Likely.h" #include "folly/Likely.h"
#include "folly/Portability.h"
namespace folly { namespace detail { namespace folly { namespace detail {
...@@ -172,8 +173,7 @@ struct SequentialThreadId { ...@@ -172,8 +173,7 @@ struct SequentialThreadId {
private: private:
static Atom<size_t> prevId; static Atom<size_t> prevId;
// TODO: switch to thread_local static FOLLY_TLS size_t currentId;
static __thread size_t currentId;
}; };
template <template<typename> class Atom, size_t kMaxCpus> template <template<typename> class Atom, size_t kMaxCpus>
......
...@@ -90,8 +90,8 @@ void MemoryIdler::flushLocalMallocCaches() { ...@@ -90,8 +90,8 @@ void MemoryIdler::flushLocalMallocCaches() {
#ifdef __x86_64__ #ifdef __x86_64__
static const size_t s_pageSize = sysconf(_SC_PAGESIZE); static const size_t s_pageSize = sysconf(_SC_PAGESIZE);
static __thread uintptr_t tls_stackLimit; static FOLLY_TLS uintptr_t tls_stackLimit;
static __thread size_t tls_stackSize; static FOLLY_TLS size_t tls_stackSize;
static void fetchStackLimits() { static void fetchStackLimits() {
pthread_attr_t attr; pthread_attr_t attr;
......
...@@ -169,7 +169,7 @@ struct StaticMeta { ...@@ -169,7 +169,7 @@ struct StaticMeta {
} }
#if !__APPLE__ #if !__APPLE__
static __thread ThreadEntry threadEntry_; static FOLLY_TLS ThreadEntry threadEntry_;
#endif #endif
static StaticMeta<Tag>* inst_; static StaticMeta<Tag>* inst_;
...@@ -412,7 +412,8 @@ struct StaticMeta { ...@@ -412,7 +412,8 @@ struct StaticMeta {
}; };
#if !__APPLE__ #if !__APPLE__
template <class Tag> __thread ThreadEntry StaticMeta<Tag>::threadEntry_ = {0}; template <class Tag>
FOLLY_TLS ThreadEntry StaticMeta<Tag>::threadEntry_ = {0};
#endif #endif
template <class Tag> StaticMeta<Tag>* StaticMeta<Tag>::inst_ = nullptr; template <class Tag> StaticMeta<Tag>* StaticMeta<Tag>::inst_ = nullptr;
......
...@@ -42,9 +42,9 @@ using namespace folly::exception_tracer; ...@@ -42,9 +42,9 @@ using namespace folly::exception_tracer;
namespace { namespace {
__thread bool invalid; FOLLY_TLS bool invalid;
__thread StackTraceStack activeExceptions; FOLLY_TLS StackTraceStack activeExceptions;
__thread StackTraceStack caughtExceptions; FOLLY_TLS StackTraceStack caughtExceptions;
pthread_once_t initialized = PTHREAD_ONCE_INIT; pthread_once_t initialized = PTHREAD_ONCE_INIT;
extern "C" { extern "C" {
......
...@@ -327,7 +327,7 @@ TEST(SequentialThreadId, Simple) { ...@@ -327,7 +327,7 @@ TEST(SequentialThreadId, Simple) {
EXPECT_EQ(cpu, again); EXPECT_EQ(cpu, again);
} }
static __thread unsigned testingCpu = 0; static FOLLY_TLS unsigned testingCpu = 0;
static int testingGetcpu(unsigned* cpu, unsigned* node, void* unused) { static int testingGetcpu(unsigned* cpu, unsigned* node, void* unused) {
if (cpu != nullptr) { if (cpu != nullptr) {
......
...@@ -25,8 +25,8 @@ ...@@ -25,8 +25,8 @@
namespace folly { namespace test { namespace folly { namespace test {
__thread sem_t* DeterministicSchedule::tls_sem; FOLLY_TLS sem_t* DeterministicSchedule::tls_sem;
__thread DeterministicSchedule* DeterministicSchedule::tls_sched; FOLLY_TLS DeterministicSchedule* DeterministicSchedule::tls_sched;
// access is protected by futexLock // access is protected by futexLock
static std::unordered_map<detail::Futex<DeterministicAtomic>*, static std::unordered_map<detail::Futex<DeterministicAtomic>*,
...@@ -335,7 +335,8 @@ test::DeterministicAtomic<size_t> ...@@ -335,7 +335,8 @@ test::DeterministicAtomic<size_t>
SequentialThreadId<test::DeterministicAtomic>::prevId(0); SequentialThreadId<test::DeterministicAtomic>::prevId(0);
template<> template<>
__thread size_t SequentialThreadId<test::DeterministicAtomic>::currentId(0); FOLLY_TLS size_t
SequentialThreadId<test::DeterministicAtomic>::currentId(0);
template<> template<>
const AccessSpreader<test::DeterministicAtomic> const AccessSpreader<test::DeterministicAtomic>
......
...@@ -129,8 +129,8 @@ class DeterministicSchedule : boost::noncopyable { ...@@ -129,8 +129,8 @@ class DeterministicSchedule : boost::noncopyable {
static int getRandNumber(int n); static int getRandNumber(int n);
private: private:
static __thread sem_t* tls_sem; static FOLLY_TLS sem_t* tls_sem;
static __thread DeterministicSchedule* tls_sched; static FOLLY_TLS DeterministicSchedule* tls_sched;
std::function<int(int)> scheduler_; std::function<int(int)> scheduler_;
std::vector<sem_t*> sems_; std::vector<sem_t*> sems_;
......
...@@ -418,8 +418,8 @@ enum LifecycleEvent { ...@@ -418,8 +418,8 @@ enum LifecycleEvent {
MAX_LIFECYCLE_EVENT MAX_LIFECYCLE_EVENT
}; };
static __thread int lc_counts[MAX_LIFECYCLE_EVENT]; static FOLLY_TLS int lc_counts[MAX_LIFECYCLE_EVENT];
static __thread int lc_prev[MAX_LIFECYCLE_EVENT]; static FOLLY_TLS int lc_prev[MAX_LIFECYCLE_EVENT];
static int lc_outstanding() { static int lc_outstanding() {
return lc_counts[DEFAULT_CONSTRUCTOR] + lc_counts[COPY_CONSTRUCTOR] + return lc_counts[DEFAULT_CONSTRUCTOR] + lc_counts[COPY_CONSTRUCTOR] +
......
...@@ -152,8 +152,8 @@ ThreadLocal<int64_t> globalTL64Baseline; ...@@ -152,8 +152,8 @@ ThreadLocal<int64_t> globalTL64Baseline;
ThreadLocal<int32_t> globalTL32Baseline; ThreadLocal<int32_t> globalTL32Baseline;
std::atomic<int64_t> globalInt64Baseline(0); std::atomic<int64_t> globalInt64Baseline(0);
std::atomic<int32_t> globalInt32Baseline(0); std::atomic<int32_t> globalInt32Baseline(0);
__thread int64_t global__thread64; FOLLY_TLS int64_t global__thread64;
__thread int32_t global__thread32; FOLLY_TLS int32_t global__thread32;
// Alternate lock-free implementation. Achieves about the same performance, // Alternate lock-free implementation. Achieves about the same performance,
// but uses about 20x more memory than ThreadCachedInt with 24 threads. // but uses about 20x more memory than ThreadCachedInt with 24 threads.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment