facebook
diff --git a/‎folly/synchronization/ParkingLot.h‎
Lines changed: 1 addition & 0 deletions b/‎folly/synchronization/ParkingLot.h‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎folly/synchronization/test/ParkingLotTest.cpp‎
Lines changed: 48 additions & 0 deletions b/‎folly/synchronization/test/ParkingLotTest.cpp‎
Lines changed: 48 additions & 0 deletions
@@ -300,6 +300,7 @@ void ParkingLot<Data>::unpark(const Key bits, Func&& func) {
   // B: Must be seq_cst.  Matches A.  If true, A *must* see in seq_cst
   // order any atomic updates in toPark() (and matching updates that
   // happen before unpark is called)
+  std::atomic_thread_fence(std::memory_order_seq_cst);
   if (bucket.count_.load(std::memory_order_seq_cst) == 0) {
     return;
   }
 
@@ -61,6 +61,54 @@ TEST(ParkingLot, multilot) {
   large.join();
 }
 
+TEST(ParkingLot, StressTestPingPong) {
+  auto lot = ParkingLot<std::uint32_t>{};
+  auto one = std::atomic<std::uint64_t>{0};
+  auto two = std::atomic<std::uint64_t>{0};
+
+  auto testDone = std::atomic<bool>{false};
+  auto threadOneDone = std::atomic<bool>{false};
+
+  auto threadOne = std::thread{[&]() {
+    auto local = std::uint64_t{0};
+    while (!testDone.load(std::memory_order_relaxed)) {
+      // wait while the atomic is still equal to c, the other thread unblocks us
+      // because it signals before spinning itself
+      lot.park(
+          &one, -1, [&]() { return one.load() == local; }, []() {});
+      local = one.load(std::memory_order_acquire);
+      two.store(local, std::memory_order_release);
+    }
+
+    threadOneDone.store(true, std::memory_order_release);
+  }};
+
+  auto threadTwo = std::thread{[&]() {
+    for (auto i = std::uint64_t{1}; true; ++i) {
+      auto local = two.load(std::memory_order_acquire);
+      assert(local < i);
+
+      // unblock the other thread
+      one.store(i, std::memory_order_release);
+      lot.unpark(&one, [&](auto&&) { return UnparkControl::RemoveBreak; });
+
+      // spinning (vs sleeping with ParkingLot::park) happens to expose the bug
+      // more frequently in practice
+      while (two.load(std::memory_order_acquire) == local) {
+        if (threadOneDone.load(std::memory_order_acquire)) {
+          return;
+        }
+      }
+    }
+  }};
+
+  /* sleep override */
+  std::this_thread::sleep_for(std::chrono::seconds{10});
+  testDone.store(true);
+  threadOne.join();
+  threadTwo.join();
+}
+
 // This is not possible to implement with Futex, because futex
 // and the native linux syscall are 32-bit only.
 TEST(ParkingLot, LargeWord) {
Original file line number	Diff line number	Diff line change
`@@ -300,6 +300,7 @@ void ParkingLot<Data>::unpark(const Key bits, Func&& func) {`
`300`	`300`	`// B: Must be seq_cst. Matches A. If true, A must see in seq_cst`
`301`	`301`	`// order any atomic updates in toPark() (and matching updates that`
`302`	`302`	`// happen before unpark is called)`
	`303`	`+ std::atomic_thread_fence(std::memory_order_seq_cst);`
`303`	`304`	`if (bucket.count_.load(std::memory_order_seq_cst) == 0) {`
`304`	`305`	`return;`
`305`	`306`	`}`