Commit 88903796 authored by Kenny Yu's avatar Kenny Yu Committed by Facebook Github Bot

Fix data race in folly/executors/Codel.cpp exposed by TSAN

Summary:
I found a data race with TSAN while attempting to run a sanitizer version of my service:

```
WARNING: ThreadSanitizer: data race (pid=266)
  Read of size 8 at 0x7b58000c0c08 by thread T370:
    @ folly::Codel::overloaded(std::chrono::duration<long, std::ratio<1l, 1000000000l> >) at ./folly/executors/Codel.cpp:44
    @ apache::thrift::concurrency::ThreadManager::ImplT<folly::LifoSemImpl<std::atomic, folly::Baton<std::atomic, true, true> > >::Worker<folly::LifoSemImpl<std::atomic, folly::Baton<std::atomic, true, true> > >::run() at ./thrift/lib/cpp/concurrency/ThreadManager.tcc:119
    @ apache::thrift::concurrency::PthreadThread::threadMain(void*) at ./thrift/lib/cpp/concurrency/PosixThreadFactory.cpp:200
    @ __tsan_thread_start_func at crtstuff.c:?

  Previous write of size 8 at 0x7b58000c0c08 by thread T371:
    @ folly::Codel::overloaded(std::chrono::duration<long, std::ratio<1l, 1000000000l> >) at ./folly/executors/Codel.cpp:62
    @ apache::thrift::concurrency::ThreadManager::ImplT<folly::LifoSemImpl<std::atomic, folly::Baton<std::atomic, true, true> > >::Worker<folly::LifoSemImpl<std::atomic, folly::Baton<std::atomic, true, true> > >::run() at ./thrift/lib/cpp/concurrency/ThreadManager.tcc:119
    @ apache::thrift::concurrency::PthreadThread::threadMain(void*) at ./thrift/lib/cpp/concurrency/PosixThreadFactory.cpp:200
    @ __tsan_thread_start_func at crtstuff.c:?

  Location is heap block of size 744 at 0x7b58000c0c00 allocated by thread T314:
    @ operator new(unsigned long) at ??:?
    @ PriorityImplT at ./thrift/lib/cpp/concurrency/ThreadManager.tcc:826
    @ void __gnu_cxx::new_allocator<apache::thrift::concurrency::PriorityThreadManager::PriorityImplT<folly::LifoSemImpl<std::atomic, folly::Baton<std::atomic, true, true> > > >::construct<apache::thrift::concurrency::PriorityThreadManager::PriorityImplT<folly::LifoSemImpl<std::atomic, folly::Baton<std::atomic, true, true> > >, std::array<std::pair<std::shared_ptr<apache::thrift::concurrency::ThreadFactory>, unsigned long>, 5ul>&, bool&, unsigned long&>(apache::thrift::concurrency::PriorityThreadManager::PriorityImplT<folly::LifoSemImpl<std::atomic, folly::Baton<std::atomic, true, true> > >*, std::array<std::pair<std::shared_ptr<apache::thrift::concurrency::ThreadFactory>, unsigned long>, 5ul>&, bool&, unsigned long&)
    @ std::shared_ptr<apache::thrift::concurrency::PriorityThreadManager> apache::thrift::concurrency::PriorityThreadManager::newPriorityThreadManager<folly::LifoSemImpl<std::atomic, folly::Baton<std::atomic, true, true> > >(std::array<unsigned long, 5ul> const&, bool, unsigned long) at ./thrift/lib/cpp/concurrency/ThreadManager.tcc:1090
    @ std::shared_ptr<apache::thrift::concurrency::PriorityThreadManager> apache::thrift::concurrency::PriorityThreadManager::newPriorityThreadManager<folly::LifoSemImpl<std::atomic, folly::Baton<std::atomic, true, true> > >(unsigned long, bool, unsigned long) at ./thrift/lib/cpp/concurrency/ThreadManager.tcc:1100
    @ apache::thrift::ThriftServer::serve() at ./thrift/lib/cpp2/server/ThriftServer.cpp:475
    @ apache::thrift::server::TServer::run() at ./thrift/lib/cpp/server/TServer.h:186
    @ apache::thrift::concurrency::PthreadThread::threadMain(void*) at ./thrift/lib/cpp/concurrency/PosixThreadFactory.cpp:200
    @ __tsan_thread_start_func at crtstuff.c:?

  Thread T370 (tid=638, running) created by thread T314 at:
    @ pthread_create at ??:?
    @ apache::thrift::concurrency::PthreadThread::start() at ./thrift/lib/cpp/concurrency/PosixThreadFactory.cpp:108
    @ apache::thrift::concurrency::ThreadManager::ImplT<folly::LifoSemImpl<std::atomic, folly::Baton<std::atomic, true, true> > >::addWorker(unsigned long) at ./thrift/lib/cpp/concurrency/ThreadManager.tcc:185
    @ apache::thrift::concurrency::PriorityThreadManager::PriorityImplT<folly::LifoSemImpl<std::atomic, folly::Baton<std::atomic, true, true> > >::start() at ./thrift/lib/cpp/concurrency/ThreadManager.tcc:840
    @ apache::thrift::ThriftServer::setup() at ./thrift/lib/cpp2/server/ThriftServer.cpp:347
    @ apache::thrift::ThriftServer::serve() at ./thrift/lib/cpp2/server/ThriftServer.cpp:475
    @ apache::thrift::server::TServer::run() at ./thrift/lib/cpp/server/TServer.h:186
    @ apache::thrift::concurrency::PthreadThread::threadMain(void*) at ./thrift/lib/cpp/concurrency/PosixThreadFactory.cpp:200
    @ __tsan_thread_start_func at crtstuff.c:?

  Thread T371 (tid=639, running) created by thread T314 at:
    @ pthread_create at ??:?
    @ apache::thrift::concurrency::PthreadThread::start() at ./thrift/lib/cpp/concurrency/PosixThreadFactory.cpp:108
    @ apache::thrift::concurrency::ThreadManager::ImplT<folly::LifoSemImpl<std::atomic, folly::Baton<std::atomic, true, true> > >::addWorker(unsigned long) at ./thrift/lib/cpp/concurrency/ThreadManager.tcc:185
    @ apache::thrift::concurrency::PriorityThreadManager::PriorityImplT<folly::LifoSemImpl<std::atomic, folly::Baton<std::atomic, true, true> > >::start() at ./thrift/lib/cpp/concurrency/ThreadManager.tcc:840
    @ apache::thrift::ThriftServer::setup() at ./thrift/lib/cpp2/server/ThriftServer.cpp:347
    @ apache::thrift::ThriftServer::serve() at ./thrift/lib/cpp2/server/ThriftServer.cpp:475
    @ apache::thrift::server::TServer::run() at ./thrift/lib/cpp/server/TServer.h:186
    @ apache::thrift::concurrency::PthreadThread::threadMain(void*) at ./thrift/lib/cpp/concurrency/PosixThreadFactory.cpp:200
    @ __tsan_thread_start_func at crtstuff.c:?

  Thread T314 (tid=582, running) created by main thread at:
    @ pthread_create at ??:?
    @ apache::thrift::concurrency::PthreadThread::start() at ./thrift/lib/cpp/concurrency/PosixThreadFactory.cpp:108
    ...
```

Looks like there is a data race in how `codelMinDelay_` is used. I couldn't get `std::atomic` to compile with `std::chrono::nanoseconds`,
so I used `std::atomic<uint64_t>` and converted between `uint64_t` and time types appropriately.

Reviewed By: yfeldblum

Differential Revision: D5759588

fbshipit-source-id: 8213f3789808265ddfe5ab122f0f86490d0ea6ea
parent af4a3352
...@@ -22,30 +22,33 @@ ...@@ -22,30 +22,33 @@
DEFINE_int32(codel_interval, 100, "Codel default interval time in ms"); DEFINE_int32(codel_interval, 100, "Codel default interval time in ms");
DEFINE_int32(codel_target_delay, 5, "Target codel queueing delay in ms"); DEFINE_int32(codel_target_delay, 5, "Target codel queueing delay in ms");
using std::chrono::nanoseconds; using namespace std::chrono;
using std::chrono::milliseconds;
namespace folly { namespace folly {
Codel::Codel() Codel::Codel()
: codelMinDelay_(0), : codelMinDelayNs_(0),
codelIntervalTime_(std::chrono::steady_clock::now()), codelIntervalTimeNs_(
duration_cast<nanoseconds>(steady_clock::now().time_since_epoch())
.count()),
codelResetDelay_(true), codelResetDelay_(true),
overloaded_(false) {} overloaded_(false) {}
bool Codel::overloaded(std::chrono::nanoseconds delay) { bool Codel::overloaded(nanoseconds delay) {
bool ret = false; bool ret = false;
auto now = std::chrono::steady_clock::now(); auto now = steady_clock::now();
// Avoid another thread updating the value at the same time we are using it // Avoid another thread updating the value at the same time we are using it
// to calculate the overloaded state // to calculate the overloaded state
auto minDelay = codelMinDelay_; auto minDelay = nanoseconds(codelMinDelayNs_);
if (now > codelIntervalTime_ && if (now > steady_clock::time_point(nanoseconds(codelIntervalTimeNs_)) &&
// testing before exchanging is more cacheline-friendly // testing before exchanging is more cacheline-friendly
(!codelResetDelay_.load(std::memory_order_acquire) && (!codelResetDelay_.load(std::memory_order_acquire) &&
!codelResetDelay_.exchange(true))) { !codelResetDelay_.exchange(true))) {
codelIntervalTime_ = now + getInterval(); codelIntervalTimeNs_ =
duration_cast<nanoseconds>((now + getInterval()).time_since_epoch())
.count();
if (minDelay > getTargetDelay()) { if (minDelay > getTargetDelay()) {
overloaded_ = true; overloaded_ = true;
...@@ -57,12 +60,12 @@ bool Codel::overloaded(std::chrono::nanoseconds delay) { ...@@ -57,12 +60,12 @@ bool Codel::overloaded(std::chrono::nanoseconds delay) {
// and that it happens after the interval reset above // and that it happens after the interval reset above
if (codelResetDelay_.load(std::memory_order_acquire) && if (codelResetDelay_.load(std::memory_order_acquire) &&
codelResetDelay_.exchange(false)) { codelResetDelay_.exchange(false)) {
codelMinDelay_ = delay; codelMinDelayNs_ = delay.count();
// More than one request must come in during an interval before codel // More than one request must come in during an interval before codel
// starts dropping requests // starts dropping requests
return false; return false;
} else if (delay < codelMinDelay_) { } else if (delay < nanoseconds(codelMinDelayNs_)) {
codelMinDelay_ = delay; codelMinDelayNs_ = delay.count();
} }
// Here is where we apply different logic than codel proper. Instead of // Here is where we apply different logic than codel proper. Instead of
...@@ -84,7 +87,7 @@ int Codel::getLoad() { ...@@ -84,7 +87,7 @@ int Codel::getLoad() {
} }
nanoseconds Codel::getMinDelay() { nanoseconds Codel::getMinDelay() {
return codelMinDelay_; return nanoseconds(codelMinDelayNs_);
} }
milliseconds Codel::getInterval() { milliseconds Codel::getInterval() {
......
...@@ -80,8 +80,8 @@ class Codel { ...@@ -80,8 +80,8 @@ class Codel {
std::chrono::milliseconds getSloughTimeout(); std::chrono::milliseconds getSloughTimeout();
private: private:
std::chrono::nanoseconds codelMinDelay_; std::atomic<uint64_t> codelMinDelayNs_;
std::chrono::time_point<std::chrono::steady_clock> codelIntervalTime_; std::atomic<uint64_t> codelIntervalTimeNs_;
// flag to make overloaded() thread-safe, since we only want // flag to make overloaded() thread-safe, since we only want
// to reset the delay once per time period // to reset the delay once per time period
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment