Commit c593b651 authored by Andrei Bajenov's avatar Andrei Bajenov Committed by Dave Watson

HHWheelTimer - fix scheduling timeouts within callbacks

Summary:
This one is a little tricky, but it explains what happened in t5388156. Basically what we observed in the stack traces is that the HHWheelTimer count_ is 0, but there is still a timeout scheduled on the AsyncTimeout.

How I can see this happening is the following:
- SASL times out
- We fall back to insecure mode in the callback and schedule a send of a real message to the server. Here we schedule another timeout on the same HHWheelTimer object. Since count_ is 0 at this point, we schedule a timeout in AsyncTimeout.
- Now the initial SASL timeout was late, so the HHWheelTimer is in a catchup state. This means that it's possible for the callback on the timeout that was just scheduled to fire on the same "tick".
- This callback invokes SR's callbacks which try to detach the event base from the channel. But it can't detach because we have something scheduled on the AsyncTimeout.
- We crash.

Test Plan:
- will wait for contbuild
- Something like this repros the problem: https://phabricator.fb.com/P17220063. Note I hacked the catchup part to get it to work. Now running loadgen with security = permitted causes the crash.

// hphp tests are stalled, have to do this...

Reviewed By: mshneer@fb.com

Subscribers: bmatheny, mshneer, sandeepkk, trunkagent, njormrod, folly-diffs@, alandau, mhorowitz, srenfro, hitesh, wstefancik

FB internal diff: D1680735

Tasks: 5388156

Signature: t1:1680735:1416252123:e76668860ccda9380a87996b4fa3de957e129404
parent ed0a5787
...@@ -56,9 +56,9 @@ void HHWheelTimer::Callback::setScheduled(HHWheelTimer* wheel, ...@@ -56,9 +56,9 @@ void HHWheelTimer::Callback::setScheduled(HHWheelTimer* wheel,
wheel_ = wheel; wheel_ = wheel;
if (wheel_->count_ == 0) { // Only update the now_ time if we're not in a timeout expired callback
wheel_->now_ = std::chrono::duration_cast<milliseconds>( if (wheel_->count_ == 0 && !wheel_->processingCallbacksGuard_) {
std::chrono::steady_clock::now().time_since_epoch()); wheel_->now_ = getCurTime();
} }
expiration_ = wheel_->now_ + timeout; expiration_ = wheel_->now_ + timeout;
...@@ -83,6 +83,7 @@ HHWheelTimer::HHWheelTimer(folly::EventBase* eventBase, ...@@ -83,6 +83,7 @@ HHWheelTimer::HHWheelTimer(folly::EventBase* eventBase,
, count_(0) , count_(0)
, catchupEveryN_(DEFAULT_CATCHUP_EVERY_N) , catchupEveryN_(DEFAULT_CATCHUP_EVERY_N)
, expirationsSinceCatchup_(0) , expirationsSinceCatchup_(0)
, processingCallbacksGuard_(false)
{ {
} }
...@@ -126,7 +127,7 @@ void HHWheelTimer::scheduleTimeout(Callback* callback, ...@@ -126,7 +127,7 @@ void HHWheelTimer::scheduleTimeout(Callback* callback,
callback->context_ = RequestContext::saveContext(); callback->context_ = RequestContext::saveContext();
if (count_ == 0) { if (count_ == 0 && !processingCallbacksGuard_) {
this->AsyncTimeout::scheduleTimeout(interval_.count()); this->AsyncTimeout::scheduleTimeout(interval_.count());
} }
...@@ -152,6 +153,13 @@ void HHWheelTimer::timeoutExpired() noexcept { ...@@ -152,6 +153,13 @@ void HHWheelTimer::timeoutExpired() noexcept {
// If destroy() is called inside timeoutExpired(), delay actual destruction // If destroy() is called inside timeoutExpired(), delay actual destruction
// until timeoutExpired() returns // until timeoutExpired() returns
DestructorGuard dg(this); DestructorGuard dg(this);
// If scheduleTimeout is called from a callback in this function, it may
// cause inconsistencies in the state of this object. As such, we need
// to treat these calls slightly differently.
processingCallbacksGuard_ = true;
auto reEntryGuard = folly::makeGuard([&] {
processingCallbacksGuard_ = false;
});
// timeoutExpired() can only be invoked directly from the event base loop. // timeoutExpired() can only be invoked directly from the event base loop.
// It should never be invoked recursively. // It should never be invoked recursively.
......
...@@ -95,6 +95,16 @@ class HHWheelTimer : private folly::AsyncTimeout, ...@@ -95,6 +95,16 @@ class HHWheelTimer : private folly::AsyncTimeout,
return wheel_ != nullptr; return wheel_ != nullptr;
} }
protected:
/**
* Don't override this unless you're doing a test. This is mainly here so
* that we can override it to simulate lag in steady_clock.
*/
virtual std::chrono::milliseconds getCurTime() {
return std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::steady_clock::now().time_since_epoch());
}
private: private:
// Get the time remaining until this timeout expires // Get the time remaining until this timeout expires
std::chrono::milliseconds getTimeRemaining( std::chrono::milliseconds getTimeRemaining(
...@@ -236,6 +246,7 @@ class HHWheelTimer : private folly::AsyncTimeout, ...@@ -236,6 +246,7 @@ class HHWheelTimer : private folly::AsyncTimeout,
uint32_t catchupEveryN_; uint32_t catchupEveryN_;
uint32_t expirationsSinceCatchup_; uint32_t expirationsSinceCatchup_;
bool processingCallbacksGuard_;
}; };
} // folly } // folly
...@@ -46,6 +46,16 @@ class TestTimeout : public HHWheelTimer::Callback { ...@@ -46,6 +46,16 @@ class TestTimeout : public HHWheelTimer::Callback {
std::function<void()> fn; std::function<void()> fn;
}; };
class TestTimeoutDelayed : public TestTimeout {
protected:
std::chrono::milliseconds getCurTime() override {
return std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::steady_clock::now().time_since_epoch()) -
milliseconds(5);
}
};
/* /*
* Test firing some simple timeouts that are fired once and never rescheduled * Test firing some simple timeouts that are fired once and never rescheduled
*/ */
...@@ -86,6 +96,32 @@ TEST(HHWheelTimerTest, FireOnce) { ...@@ -86,6 +96,32 @@ TEST(HHWheelTimerTest, FireOnce) {
T_CHECK_TIMEOUT(start, end, milliseconds(10)); T_CHECK_TIMEOUT(start, end, milliseconds(10));
} }
/*
* Test scheduling a timeout from another timeout callback.
*/
BOOST_AUTO_TEST_CASE(CallbackSchedulingTimeout) {
TEventBase eventBase;
StackWheelTimer t(&eventBase, milliseconds(10));
const HHWheelTimer::Callback* nullCallback = nullptr;
TestTimeout t1;
// Delayed to simulate the steady_clock counter lagging
TestTimeoutDelayed t2;
t.scheduleTimeout(&t1, milliseconds(500));
t1.fn = [&] { t.scheduleTimeout(&t2, milliseconds(1)); };
// If t is in an inconsistent state, detachEventBase should fail.
t2.fn = [&] { t.detachEventBase(); };
BOOST_REQUIRE_EQUAL(t.count(), 1);
eventBase.loop();
BOOST_REQUIRE_EQUAL(t.count(), 0);
BOOST_REQUIRE_EQUAL(t1.timestamps.size(), 1);
BOOST_REQUIRE_EQUAL(t2.timestamps.size(), 1);
}
/* /*
* Test cancelling a timeout when it is scheduled to be fired right away. * Test cancelling a timeout when it is scheduled to be fired right away.
*/ */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment