Skip to content

Commit ff7ab9d

Browse files
aaryfacebook-github-bot
authored andcommitted
Fix ParkingLot memory ordering bug
Summary: ``` auto x = std::atomic<std::uint64_t>{0}; auto y = std::atomic<std::uint64_t>{0}; // thread 1 x.store(1, std::memory_order_release); auto one = y.load(std::memory_order_seq_cst); // thread 2 y.fetch_add(1, std::memory_order_seq_cst); auto two = x.load(std::memory_order_seq_cst); ``` Here it is possible for both `one` and `two` to end up with the value `0`. The code in ParkingLot assumed that this would not be possible; and the counter used to track the number of waiters could get reordered with respect to loads around it. This diff adds a seq_cst fence to ensure unparking threads always sequence their stores before parking _before_ the counter load globally. Reviewed By: yfeldblum, ot Differential Revision: D28972810 fbshipit-source-id: 06eb6a2e6df6b00bf07ac8454a79257a5276e154
1 parent d418b5e commit ff7ab9d

2 files changed

Lines changed: 49 additions & 0 deletions

File tree

‎folly/synchronization/ParkingLot.h‎

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,7 @@ void ParkingLot<Data>::unpark(const Key bits, Func&& func) {
300300
// B: Must be seq_cst. Matches A. If true, A *must* see in seq_cst
301301
// order any atomic updates in toPark() (and matching updates that
302302
// happen before unpark is called)
303+
std::atomic_thread_fence(std::memory_order_seq_cst);
303304
if (bucket.count_.load(std::memory_order_seq_cst) == 0) {
304305
return;
305306
}

‎folly/synchronization/test/ParkingLotTest.cpp‎

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,54 @@ TEST(ParkingLot, multilot) {
6161
large.join();
6262
}
6363

64+
TEST(ParkingLot, StressTestPingPong) {
65+
auto lot = ParkingLot<std::uint32_t>{};
66+
auto one = std::atomic<std::uint64_t>{0};
67+
auto two = std::atomic<std::uint64_t>{0};
68+
69+
auto testDone = std::atomic<bool>{false};
70+
auto threadOneDone = std::atomic<bool>{false};
71+
72+
auto threadOne = std::thread{[&]() {
73+
auto local = std::uint64_t{0};
74+
while (!testDone.load(std::memory_order_relaxed)) {
75+
// wait while the atomic is still equal to c, the other thread unblocks us
76+
// because it signals before spinning itself
77+
lot.park(
78+
&one, -1, [&]() { return one.load() == local; }, []() {});
79+
local = one.load(std::memory_order_acquire);
80+
two.store(local, std::memory_order_release);
81+
}
82+
83+
threadOneDone.store(true, std::memory_order_release);
84+
}};
85+
86+
auto threadTwo = std::thread{[&]() {
87+
for (auto i = std::uint64_t{1}; true; ++i) {
88+
auto local = two.load(std::memory_order_acquire);
89+
assert(local < i);
90+
91+
// unblock the other thread
92+
one.store(i, std::memory_order_release);
93+
lot.unpark(&one, [&](auto&&) { return UnparkControl::RemoveBreak; });
94+
95+
// spinning (vs sleeping with ParkingLot::park) happens to expose the bug
96+
// more frequently in practice
97+
while (two.load(std::memory_order_acquire) == local) {
98+
if (threadOneDone.load(std::memory_order_acquire)) {
99+
return;
100+
}
101+
}
102+
}
103+
}};
104+
105+
/* sleep override */
106+
std::this_thread::sleep_for(std::chrono::seconds{10});
107+
testDone.store(true);
108+
threadOne.join();
109+
threadTwo.join();
110+
}
111+
64112
// This is not possible to implement with Futex, because futex
65113
// and the native linux syscall are 32-bit only.
66114
TEST(ParkingLot, LargeWord) {

0 commit comments

Comments
 (0)