Commit 88903796 authored by Kenny Yu's avatar Kenny Yu Committed by Facebook Github Bot

Fix data race in folly/executors/Codel.cpp exposed by TSAN

Summary:
I found a data race with TSAN while attempting to run a sanitizer version of my service:

```
WARNING: ThreadSanitizer: data race (pid=266)
  Read of size 8 at 0x7b58000c0c08 by thread T370:
    @ folly::Codel::overloaded(std::chrono::duration<long, std::ratio<1l, 1000000000l> >) at ./folly/executors/Codel.cpp:44
    @ apache::thrift::concurrency::ThreadManager::ImplT<folly::LifoSemImpl<std::atomic, folly::Baton<std::atomic, true, true> > >::Worker<folly::LifoSemImpl<std::atomic, folly::Baton<std::atomic, true, true> > >::run() at ./thrift/lib/cpp/concurrency/ThreadManager.tcc:119
    @ apache::thrift::concurrency::PthreadThread::threadMain(void*) at ./thrift/lib/cpp/concurrency/PosixThreadFactory.cpp:200
    @ __tsan_thread_start_func at crtstuff.c:?

  Previous write of size 8 at 0x7b58000c0c08 by thread T371:
    @ folly::Codel::overloaded(std::chrono::duration<long, std::ratio<1l, 1000000000l> >) at ./folly/executors/Codel.cpp:62
    @ apache::thrift::concurrency::ThreadManager::ImplT<folly::LifoSemImpl<std::atomic, folly::Baton<std::atomic, true, true> > >::Worker<folly::LifoSemImpl<std::atomic, folly::Baton<std::atomic, true, true> > >::run() at ./thrift/lib/cpp/concurrency/ThreadManager.tcc:119
    @ apache::thrift::concurrency::PthreadThread::threadMain(void*) at ./thrift/lib/cpp/concurrency/PosixThreadFactory.cpp:200
    @ __tsan_thread_start_func at crtstuff.c:?

  Location is heap block of size 744 at 0x7b58000c0c00 allocated by thread T314:
    @ operator new(unsigned long) at ??:?
    @ PriorityImplT at ./thrift/lib/cpp/concurrency/ThreadManager.tcc:826
    @ void __gnu_cxx::new_allocator<apache::thrift::concurrency::PriorityThreadManager::PriorityImplT<folly::LifoSemImpl<std::atomic, folly::Baton<std::atomic, true, true> > > >::construct<apache::thrift::concurrency::PriorityThreadManager::PriorityImplT<folly::LifoSemImpl<std::atomic, folly::Baton<std::atomic, true, true> > >, std::array<std::pair<std::shared_ptr<apache::thrift::concurrency::ThreadFactory>, unsigned long>, 5ul>&, bool&, unsigned long&>(apache::thrift::concurrency::PriorityThreadManager::PriorityImplT<folly::LifoSemImpl<std::atomic, folly::Baton<std::atomic, true, true> > >*, std::array<std::pair<std::shared_ptr<apache::thrift::concurrency::ThreadFactory>, unsigned long>, 5ul>&, bool&, unsigned long&)
    @ std::shared_ptr<apache::thrift::concurrency::PriorityThreadManager> apache::thrift::concurrency::PriorityThreadManager::newPriorityThreadManager<folly::LifoSemImpl<std::atomic, folly::Baton<std::atomic, true, true> > >(std::array<unsigned long, 5ul> const&, bool, unsigned long) at ./thrift/lib/cpp/concurrency/ThreadManager.tcc:1090
    @ std::shared_ptr<apache::thrift::concurrency::PriorityThreadManager> apache::thrift::concurrency::PriorityThreadManager::newPriorityThreadManager<folly::LifoSemImpl<std::atomic, folly::Baton<std::atomic, true, true> > >(unsigned long, bool, unsigned long) at ./thrift/lib/cpp/concurrency/ThreadManager.tcc:1100
    @ apache::thrift::ThriftServer::serve() at ./thrift/lib/cpp2/server/ThriftServer.cpp:475
    @ apache::thrift::server::TServer::run() at ./thrift/lib/cpp/server/TServer.h:186
    @ apache::thrift::concurrency::PthreadThread::threadMain(void*) at ./thrift/lib/cpp/concurrency/PosixThreadFactory.cpp:200
    @ __tsan_thread_start_func at crtstuff.c:?

  Thread T370 (tid=638, running) created by thread T314 at:
    @ pthread_create at ??:?
    @ apache::thrift::concurrency::PthreadThread::start() at ./thrift/lib/cpp/concurrency/PosixThreadFactory.cpp:108
    @ apache::thrift::concurrency::ThreadManager::ImplT<folly::LifoSemImpl<std::atomic, folly::Baton<std::atomic, true, true> > >::addWorker(unsigned long) at ./thrift/lib/cpp/concurrency/ThreadManager.tcc:185
    @ apache::thrift::concurrency::PriorityThreadManager::PriorityImplT<folly::LifoSemImpl<std::atomic, folly::Baton<std::atomic, true, true> > >::start() at ./thrift/lib/cpp/concurrency/ThreadManager.tcc:840
    @ apache::thrift::ThriftServer::setup() at ./thrift/lib/cpp2/server/ThriftServer.cpp:347
    @ apache::thrift::ThriftServer::serve() at ./thrift/lib/cpp2/server/ThriftServer.cpp:475
    @ apache::thrift::server::TServer::run() at ./thrift/lib/cpp/server/TServer.h:186
    @ apache::thrift::concurrency::PthreadThread::threadMain(void*) at ./thrift/lib/cpp/concurrency/PosixThreadFactory.cpp:200
    @ __tsan_thread_start_func at crtstuff.c:?

  Thread T371 (tid=639, running) created by thread T314 at:
    @ pthread_create at ??:?
    @ apache::thrift::concurrency::PthreadThread::start() at ./thrift/lib/cpp/concurrency/PosixThreadFactory.cpp:108
    @ apache::thrift::concurrency::ThreadManager::ImplT<folly::LifoSemImpl<std::atomic, folly::Baton<std::atomic, true, true> > >::addWorker(unsigned long) at ./thrift/lib/cpp/concurrency/ThreadManager.tcc:185
    @ apache::thrift::concurrency::PriorityThreadManager::PriorityImplT<folly::LifoSemImpl<std::atomic, folly::Baton<std::atomic, true, true> > >::start() at ./thrift/lib/cpp/concurrency/ThreadManager.tcc:840
    @ apache::thrift::ThriftServer::setup() at ./thrift/lib/cpp2/server/ThriftServer.cpp:347
    @ apache::thrift::ThriftServer::serve() at ./thrift/lib/cpp2/server/ThriftServer.cpp:475
    @ apache::thrift::server::TServer::run() at ./thrift/lib/cpp/server/TServer.h:186
    @ apache::thrift::concurrency::PthreadThread::threadMain(void*) at ./thrift/lib/cpp/concurrency/PosixThreadFactory.cpp:200
    @ __tsan_thread_start_func at crtstuff.c:?

  Thread T314 (tid=582, running) created by main thread at:
    @ pthread_create at ??:?
    @ apache::thrift::concurrency::PthreadThread::start() at ./thrift/lib/cpp/concurrency/PosixThreadFactory.cpp:108
    ...
```

Looks like there is a data race in how `codelMinDelay_` is used. I couldn't get `std::atomic` to compile with `std::chrono::nanoseconds`,
so I used `std::atomic<uint64_t>` and converted between `uint64_t` and time types appropriately.

Reviewed By: yfeldblum

Differential Revision: D5759588

fbshipit-source-id: 8213f3789808265ddfe5ab122f0f86490d0ea6ea
parent af4a3352
......@@ -22,30 +22,33 @@
DEFINE_int32(codel_interval, 100, "Codel default interval time in ms");
DEFINE_int32(codel_target_delay, 5, "Target codel queueing delay in ms");
using std::chrono::nanoseconds;
using std::chrono::milliseconds;
using namespace std::chrono;
namespace folly {
Codel::Codel()
: codelMinDelay_(0),
codelIntervalTime_(std::chrono::steady_clock::now()),
: codelMinDelayNs_(0),
codelIntervalTimeNs_(
duration_cast<nanoseconds>(steady_clock::now().time_since_epoch())
.count()),
codelResetDelay_(true),
overloaded_(false) {}
bool Codel::overloaded(std::chrono::nanoseconds delay) {
bool Codel::overloaded(nanoseconds delay) {
bool ret = false;
auto now = std::chrono::steady_clock::now();
auto now = steady_clock::now();
// Avoid another thread updating the value at the same time we are using it
// to calculate the overloaded state
auto minDelay = codelMinDelay_;
auto minDelay = nanoseconds(codelMinDelayNs_);
if (now > codelIntervalTime_ &&
if (now > steady_clock::time_point(nanoseconds(codelIntervalTimeNs_)) &&
// testing before exchanging is more cacheline-friendly
(!codelResetDelay_.load(std::memory_order_acquire) &&
!codelResetDelay_.exchange(true))) {
codelIntervalTime_ = now + getInterval();
codelIntervalTimeNs_ =
duration_cast<nanoseconds>((now + getInterval()).time_since_epoch())
.count();
if (minDelay > getTargetDelay()) {
overloaded_ = true;
......@@ -57,12 +60,12 @@ bool Codel::overloaded(std::chrono::nanoseconds delay) {
// and that it happens after the interval reset above
if (codelResetDelay_.load(std::memory_order_acquire) &&
codelResetDelay_.exchange(false)) {
codelMinDelay_ = delay;
codelMinDelayNs_ = delay.count();
// More than one request must come in during an interval before codel
// starts dropping requests
return false;
} else if (delay < codelMinDelay_) {
codelMinDelay_ = delay;
} else if (delay < nanoseconds(codelMinDelayNs_)) {
codelMinDelayNs_ = delay.count();
}
// Here is where we apply different logic than codel proper. Instead of
......@@ -84,7 +87,7 @@ int Codel::getLoad() {
}
nanoseconds Codel::getMinDelay() {
return codelMinDelay_;
return nanoseconds(codelMinDelayNs_);
}
milliseconds Codel::getInterval() {
......
......@@ -80,8 +80,8 @@ class Codel {
std::chrono::milliseconds getSloughTimeout();
private:
std::chrono::nanoseconds codelMinDelay_;
std::chrono::time_point<std::chrono::steady_clock> codelIntervalTime_;
std::atomic<uint64_t> codelMinDelayNs_;
std::atomic<uint64_t> codelIntervalTimeNs_;
// flag to make overloaded() thread-safe, since we only want
// to reset the delay once per time period
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment