Commit ff7ab9db authored by Aaryaman Sagar's avatar Aaryaman Sagar Committed by Facebook GitHub Bot

Fix ParkingLot memory ordering bug

Summary:
```
auto x = std::atomic<std::uint64_t>{0};
auto y = std::atomic<std::uint64_t>{0};

// thread 1
x.store(1, std::memory_order_release);
auto one = y.load(std::memory_order_seq_cst);

// thread 2
y.fetch_add(1, std::memory_order_seq_cst);
auto two = x.load(std::memory_order_seq_cst);
```
Here it is possible for both `one` and `two` to end up with the value `0`. The
code in ParkingLot assumed that this would not be possible; and the counter used
to track the number of waiters could get reordered with respect to loads around
it.  This diff adds a seq_cst fence to ensure unparking threads always sequence
their stores before parking _before_ the counter load globally.

Reviewed By: yfeldblum, ot

Differential Revision: D28972810

fbshipit-source-id: 06eb6a2e6df6b00bf07ac8454a79257a5276e154
parent d418b5ee
......@@ -300,6 +300,7 @@ void ParkingLot<Data>::unpark(const Key bits, Func&& func) {
// B: Must be seq_cst. Matches A. If true, A *must* see in seq_cst
// order any atomic updates in toPark() (and matching updates that
// happen before unpark is called)
std::atomic_thread_fence(std::memory_order_seq_cst);
if (bucket.count_.load(std::memory_order_seq_cst) == 0) {
return;
}
......
......@@ -61,6 +61,54 @@ TEST(ParkingLot, multilot) {
large.join();
}
TEST(ParkingLot, StressTestPingPong) {
auto lot = ParkingLot<std::uint32_t>{};
auto one = std::atomic<std::uint64_t>{0};
auto two = std::atomic<std::uint64_t>{0};
auto testDone = std::atomic<bool>{false};
auto threadOneDone = std::atomic<bool>{false};
auto threadOne = std::thread{[&]() {
auto local = std::uint64_t{0};
while (!testDone.load(std::memory_order_relaxed)) {
// wait while the atomic is still equal to c, the other thread unblocks us
// because it signals before spinning itself
lot.park(
&one, -1, [&]() { return one.load() == local; }, []() {});
local = one.load(std::memory_order_acquire);
two.store(local, std::memory_order_release);
}
threadOneDone.store(true, std::memory_order_release);
}};
auto threadTwo = std::thread{[&]() {
for (auto i = std::uint64_t{1}; true; ++i) {
auto local = two.load(std::memory_order_acquire);
assert(local < i);
// unblock the other thread
one.store(i, std::memory_order_release);
lot.unpark(&one, [&](auto&&) { return UnparkControl::RemoveBreak; });
// spinning (vs sleeping with ParkingLot::park) happens to expose the bug
// more frequently in practice
while (two.load(std::memory_order_acquire) == local) {
if (threadOneDone.load(std::memory_order_acquire)) {
return;
}
}
}
}};
/* sleep override */
std::this_thread::sleep_for(std::chrono::seconds{10});
testDone.store(true);
threadOne.join();
threadTwo.join();
}
// This is not possible to implement with Futex, because futex
// and the native linux syscall are 32-bit only.
TEST(ParkingLot, LargeWord) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment