Commit 07802d90 authored by Philipp Unterbrunner's avatar Philipp Unterbrunner Committed by Facebook Github Bot 1

Generalized and polished folly::TokenBucket

Summary: Added support for user-defined clock classes, improved comments, and removed part of the std::atomics use that had no effect on thread-safety.

Reviewed By: yfeldblum

Differential Revision: D3708378

fbshipit-source-id: 1a933c3707c12311584a3b33afd773ee91577167
parent dcf0273a
...@@ -18,155 +18,371 @@ ...@@ -18,155 +18,371 @@
#include <algorithm> #include <algorithm>
#include <atomic> #include <atomic>
#include <limits>
#include <chrono> #include <chrono>
#include <folly/Likely.h> #include <folly/Likely.h>
#include <folly/detail/CacheLocality.h>
namespace folly { namespace folly {
/** Threadsafe TokenBucket implementation, based on the idea of /**
* converting tokens into time and maintaining state as a timestamp relative to * Default clock class used by ParameterizedDynamicTokenBucket and derived
* now. The number of tokens available is represented by the delta between now * classes. User-defined clock classes must be steady (monotonic) and define a
* and the timestamp, and the 'burst' is represented by the maximum delta. * static function std::chrono::duration<> timeSinceEpoch().
*/ */
class TokenBucket { struct DefaultTokenBucketClock {
private: static auto timeSinceEpoch() noexcept
std::atomic<double> time_; -> decltype(std::chrono::steady_clock::now().time_since_epoch()) {
std::atomic<double> secondsPerToken_; return std::chrono::steady_clock::now().time_since_epoch();
std::atomic<double> secondsPerBurst_;
public:
TokenBucket(double rate, double burst, double nowInSeconds) noexcept
: time_(nowInSeconds) {
reset(rate, burst, nowInSeconds);
} }
};
void reset(double rate, double burst, double nowInSeconds) noexcept { /**
double tokens = available(nowInSeconds); * Thread-safe (atomic) token bucket implementation.
*
secondsPerToken_.store( * A token bucket (http://en.wikipedia.org/wiki/Token_bucket) models a stream
1.0 / rate - std::numeric_limits<double>::epsilon(), * of events with an average rate and some amount of burstiness. The canonical
std::memory_order_relaxed); * example is a packet switched network: the network can accept some number of
* bytes per second and the bytes come in finite packets (bursts). A token
* bucket stores up to a fixed number of tokens (the burst size). Some number
* of tokens are removed when an event occurs. The tokens are replenished at a
* fixed rate.
*
* This implementation records the last time it was updated. This allows the
* token bucket to add tokens "just in time" when tokens are requested.
*
* The "dynamic" base variant allows the token generation rate and maximum
* burst size to change with every token consumption.
*
* @tparam ClockT Clock type, must be steady i.e. monotonic.
*/
template <typename ClockT = DefaultTokenBucketClock>
class ParameterizedDynamicTokenBucket {
public:
/**
* Constructor.
*
* @param zeroTime Initial time at which to consider the token bucket
* starting to fill. Defaults to 0, so by default token
* buckets are "full" after construction.
*/
explicit ParameterizedDynamicTokenBucket(double zeroTime = 0) noexcept
: zeroTime_(zeroTime) {}
secondsPerBurst_.store( /**
burst / rate + std::numeric_limits<double>::epsilon(), * Copy constructor.
std::memory_order_relaxed); *
* Thread-safe. (Copy constructors of derived classes may not be thread-safe
* however.)
*/
ParameterizedDynamicTokenBucket(
const ParameterizedDynamicTokenBucket& other) noexcept
: zeroTime_(other.zeroTime_.load()) {}
set_capacity(tokens, nowInSeconds); /**
* Copy-assignment operator.
*
* Warning: not thread safe for the object being assigned to (including
* self-assignment). Thread-safe for the other object.
*/
ParameterizedDynamicTokenBucket& operator=(
const ParameterizedDynamicTokenBucket& other) noexcept {
zeroTime_ = other.zeroTime_.load();
return *this;
} }
void set_capacity(double tokens, double nowInSeconds) noexcept { /**
const double secondsPerToken = std::atomic_load_explicit( * Re-initialize token bucket.
&secondsPerToken_, std::memory_order_relaxed); *
* Thread-safe.
const double secondsPerBurst = std::atomic_load_explicit( *
&secondsPerBurst_, std::memory_order_relaxed); * @param zeroTime Initial time at which to consider the token bucket
* starting to fill. Defaults to 0, so by default token
double newTime = nowInSeconds - std::min( * bucket is reset to "full".
tokens * secondsPerToken, secondsPerBurst); */
void reset(double zeroTime = 0) noexcept {
time_.store(newTime, std::memory_order_relaxed); zeroTime_ = zeroTime;
} }
// If there are `tokens` avilable at `nowInSeconds`, consume them and /**
// return true. Otherwise, return false. * Attempts to consume some number of tokens. Tokens are first added to the
// * bucket based on the time elapsed since the last attempt to consume tokens.
// This implementation is written in a lock-free manner using a * Note: Attempts to consume more tokens than the burst size will always
// compare-and-exchange loop, with branch prediction optimized to minimize * fail.
// time spent in the 'success' case which performs a write. *
bool consume(double tokens, double nowInSeconds) noexcept { * Thread-safe.
const double secondsNeeded = tokens * std::atomic_load_explicit( *
&secondsPerToken_, std::memory_order_relaxed); * @param toConsume The number of tokens to consume.
* @param rate Number of tokens to generate per second.
const double minTime = nowInSeconds - std::atomic_load_explicit( * @param burstSize Maximum burst size. Must be greater than 0.
&secondsPerBurst_, std::memory_order_relaxed); * @param nowInSeconds Current time in seconds. Should be monotonically
* increasing from the nowInSeconds specified in
* this token bucket's constructor.
* @return True if the rate limit check passed, false otherwise.
*/
bool consume(
double toConsume,
double rate,
double burstSize,
double nowInSeconds = defaultClockNow()) {
assert(rate > 0);
assert(burstSize > 0);
return this->consumeImpl(
rate, burstSize, nowInSeconds, [toConsume](double& tokens) {
if (tokens < toConsume) {
return false;
}
tokens -= toConsume;
return true;
});
}
double oldTime = /**
std::atomic_load_explicit(&time_, std::memory_order_relaxed); * Similar to consume, but always consumes some number of tokens. If the
double newTime = oldTime; * bucket contains enough tokens - consumes toConsume tokens. Otherwise the
* bucket is drained.
*
* Thread-safe.
*
* @param toConsume The number of tokens to consume.
* @param rate Number of tokens to generate per second.
* @param burstSize Maximum burst size. Must be greater than 0.
* @param nowInSeconds Current time in seconds. Should be monotonically
* increasing from the nowInSeconds specified in
* this token bucket's constructor.
* @return number of tokens that were consumed.
*/
double consumeOrDrain(
double toConsume,
double rate,
double burstSize,
double nowInSeconds = defaultClockNow()) {
assert(rate > 0);
assert(burstSize > 0);
// Limit the number of available tokens to 'burst'. We don't need to do double consumed;
// this inside the loop because if we iterate more than once another this->consumeImpl(
// caller will have performed an update that also covered this rate, burstSize, nowInSeconds, [&consumed, toConsume](double& tokens) {
// calculation. Also, tell the compiler to optimize branch prediction to if (tokens < toConsume) {
// minimize time spent between reads and writes in the success case consumed = tokens;
if (UNLIKELY(minTime > oldTime)) { tokens = 0.0;
newTime = minTime; } else {
consumed = toConsume;
tokens -= toConsume;
}
return true;
});
return consumed;
} }
while (true) { /**
newTime += secondsNeeded; * Returns the number of tokens currently available.
*
// Optimize for the write-contention case, to minimize the impact of * Thread-safe (but returned value may immediately be outdated).
// branch misprediction on other threads */
if (UNLIKELY(newTime > nowInSeconds)) { double available(
return false; double rate,
double burstSize,
double nowInSeconds = defaultClockNow()) const noexcept {
assert(rate > 0);
assert(burstSize > 0);
return std::min((nowInSeconds - this->zeroTime_) * rate, burstSize);
} }
// Optimize for the write-contention case, to minimize the impact of /**
// branch misprediction on other threads * Returns the current time in seconds since Epoch.
if (LIKELY(std::atomic_compare_exchange_weak_explicit( */
&time_, &oldTime, newTime, static double defaultClockNow() noexcept(noexcept(ClockT::timeSinceEpoch())) {
std::memory_order_relaxed, std::memory_order_relaxed))) { return std::chrono::duration_cast<std::chrono::duration<double>>(
return true; ClockT::timeSinceEpoch())
.count();
} }
newTime = oldTime; private:
template <typename TCallback>
bool consumeImpl(
double rate,
double burstSize,
double nowInSeconds,
const TCallback& callback) {
auto zeroTimeOld = zeroTime_.load();
double zeroTimeNew;
do {
auto tokens = std::min((nowInSeconds - zeroTimeOld) * rate, burstSize);
if (!callback(tokens)) {
return false;
} }
zeroTimeNew = nowInSeconds - tokens / rate;
} while (
UNLIKELY(!zeroTime_.compare_exchange_weak(zeroTimeOld, zeroTimeNew)));
return true; return true;
} }
// Similar to consume, but will always consume some number of tokens. std::atomic<double> zeroTime_ FOLLY_ALIGN_TO_AVOID_FALSE_SHARING;
double consumeOrDrain(double tokens, double nowInSeconds) noexcept { };
const double secondsPerToken = std::atomic_load_explicit(
&secondsPerToken_, std::memory_order_relaxed);
const double secondsNeeded = tokens * secondsPerToken; /**
const double minTime = nowInSeconds - std::atomic_load_explicit( * Specialization of ParameterizedDynamicTokenBucket with a fixed token
&secondsPerBurst_, std::memory_order_relaxed); * generation rate and a fixed maximum burst size.
*/
template <typename ClockT = DefaultTokenBucketClock>
class ParameterizedTokenBucket {
private:
using Impl = ParameterizedDynamicTokenBucket<ClockT>;
public:
/**
* Construct a token bucket with a specific maximum rate and burst size.
*
* @param genRate Number of tokens to generate per second.
* @param burstSize Maximum burst size. Must be greater than 0.
* @param zeroTime Initial time at which to consider the token bucket
* starting to fill. Defaults to 0, so by default token
* bucket is "full" after construction.
*/
ParameterizedTokenBucket(
double genRate,
double burstSize,
double zeroTime = 0) noexcept
: tokenBucket_(zeroTime), rate_(genRate), burstSize_(burstSize) {
assert(rate_ > 0);
assert(burstSize_ > 0);
}
double oldTime = /**
std::atomic_load_explicit(&time_, std::memory_order_relaxed); * Copy constructor.
double newTime = oldTime; *
* Warning: not thread safe!
*/
ParameterizedTokenBucket(const ParameterizedTokenBucket& other) noexcept =
default;
/**
* Copy-assignment operator.
*
* Warning: not thread safe!
*/
ParameterizedTokenBucket& operator=(
const ParameterizedTokenBucket& other) noexcept = default;
// Limit the number of available tokens to 'burst'. /**
// Also, tell the compiler to optimize branch prediction to * Change rate and burst size.
// minimize time spent between reads and writes in the success case *
if (UNLIKELY(minTime > oldTime)) { * Warning: not thread safe!
newTime = minTime; *
* @param genRate Number of tokens to generate per second.
* @param burstSize Maximum burst size. Must be greater than 0.
* @param nowInSeconds Current time in seconds. Should be monotonically
* increasing from the nowInSeconds specified in
* this token bucket's constructor.
*/
void reset(
double genRate,
double burstSize,
double nowInSeconds = defaultClockNow()) noexcept {
assert(genRate > 0);
assert(burstSize > 0);
double availTokens = available(nowInSeconds);
rate_ = genRate;
burstSize_ = burstSize;
setCapacity(availTokens, nowInSeconds);
} }
double consumed; /**
* Change number of tokens in bucket.
newTime += secondsNeeded; *
* Warning: not thread safe!
*
* @param tokens Desired number of tokens in bucket after the call.
* @param nowInSeconds Current time in seconds. Should be monotonically
* increasing from the nowInSeconds specified in
* this token bucket's constructor.
*/
void setCapacity(double tokens, double nowInSeconds) noexcept {
tokenBucket_.reset(nowInSeconds - tokens / rate_);
}
consumed = (newTime - nowInSeconds) / secondsPerToken; /**
time_.store(newTime, std::memory_order_relaxed); * Attempts to consume some number of tokens. Tokens are first added to the
* bucket based on the time elapsed since the last attempt to consume tokens.
* Note: Attempts to consume more tokens than the burst size will always
* fail.
*
* Thread-safe.
*
* @param toConsume The number of tokens to consume.
* @param nowInSeconds Current time in seconds. Should be monotonically
* increasing from the nowInSeconds specified in
* this token bucket's constructor.
* @return True if the rate limit check passed, false otherwise.
*/
bool consume(double toConsume, double nowInSeconds = defaultClockNow()) {
return tokenBucket_.consume(toConsume, rate_, burstSize_, nowInSeconds);
}
return consumed; /**
* Similar to consume, but always consumes some number of tokens. If the
* bucket contains enough tokens - consumes toConsume tokens. Otherwise the
* bucket is drained.
*
* Thread-safe.
*
* @param toConsume The number of tokens to consume.
* @param nowInSeconds Current time in seconds. Should be monotonically
* increasing from the nowInSeconds specified in
* this token bucket's constructor.
* @return number of tokens that were consumed.
*/
double consumeOrDrain(
double toConsume,
double nowInSeconds = defaultClockNow()) {
return tokenBucket_.consumeOrDrain(
toConsume, rate_, burstSize_, nowInSeconds);
} }
double available(double nowInSeconds = defaultClockNow()) const noexcept { /**
double time = * Returns the number of tokens currently available.
std::atomic_load_explicit(&time_, std::memory_order_relaxed); *
* Thread-safe (but returned value may immediately be outdated).
*/
double available(double nowInSeconds = defaultClockNow()) const {
return tokenBucket_.available(rate_, burstSize_, nowInSeconds);
}
double deltaTime = std::min( /**
std::atomic_load_explicit(&secondsPerBurst_, * Returns the number of tokens generated per second.
std::memory_order_relaxed), *
nowInSeconds - time); * Thread-safe (but returned value may immediately be outdated).
*/
double rate() const noexcept {
return rate_;
}
return std::max(0.0, deltaTime / std::atomic_load_explicit( /**
&secondsPerToken_, std::memory_order_relaxed)); * Returns the maximum burst size.
*
* Thread-safe (but returned value may immediately be outdated).
*/
double burst() const noexcept {
return burstSize_;
} }
static double defaultClockNow() { /**
return std::chrono::duration_cast<std::chrono::microseconds>( * Returns the current time in seconds since Epoch.
std::chrono::steady_clock::now().time_since_epoch() */
).count() / 1000000.0; static double defaultClockNow() noexcept(noexcept(Impl::defaultClockNow())) {
return Impl::defaultClockNow();
} }
private:
Impl tokenBucket_;
double rate_;
double burstSize_;
}; };
using TokenBucket = ParameterizedTokenBucket<>;
using DynamicTokenBucket = ParameterizedDynamicTokenBucket<>;
} }
...@@ -72,3 +72,64 @@ static std::vector<std::pair<double, double> > rateToConsumeSize = { ...@@ -72,3 +72,64 @@ static std::vector<std::pair<double, double> > rateToConsumeSize = {
INSTANTIATE_TEST_CASE_P(TokenBucket, INSTANTIATE_TEST_CASE_P(TokenBucket,
TokenBucketTest, TokenBucketTest,
::testing::ValuesIn(rateToConsumeSize)); ::testing::ValuesIn(rateToConsumeSize));
void doTokenBucketTest(double maxQps, double consumeSize) {
const double tenMillisecondBurst = maxQps * 0.010;
// Select a burst size of 10 milliseconds at the max rate or the consume size
// if 10 ms at maxQps is too small.
const double burstSize = std::max(consumeSize, tenMillisecondBurst);
TokenBucket tokenBucket(maxQps, burstSize, 0);
double tokenCounter = 0;
double currentTime = 0;
// Simulate time advancing 10 seconds
for (; currentTime <= 10.0; currentTime += 0.001) {
EXPECT_FALSE(tokenBucket.consume(burstSize + 1, currentTime));
while (tokenBucket.consume(consumeSize, currentTime)) {
tokenCounter += consumeSize;
}
// Tokens consumed should exceed some lower bound based on maxQps.
// Note: The token bucket implementation is not precise, so the lower bound
// is somewhat fudged. The upper bound is accurate however.
EXPECT_LE(maxQps * currentTime * 0.9 - 1, tokenCounter);
// Tokens consumed should not exceed some upper bound based on maxQps.
EXPECT_GE(maxQps * currentTime + 1e-6, tokenCounter);
}
}
TEST(TokenBucket, sanity) {
doTokenBucketTest(100, 1);
doTokenBucketTest(1000, 1);
doTokenBucketTest(10000, 1);
// Consume more than one at a time.
doTokenBucketTest(10000, 5);
}
TEST(TokenBucket, ReverseTime2) {
const double rate = 1000;
TokenBucket tokenBucket(rate, rate * 0.01 + 1e-6);
size_t count = 0;
while (tokenBucket.consume(1, 0.1)) {
count += 1;
}
EXPECT_EQ(10, count);
// Going backwards in time has no affect on the toke count (this protects
// against different threads providing out of order timestamps).
double tokensBefore = tokenBucket.available();
EXPECT_FALSE(tokenBucket.consume(1, 0.09999999));
EXPECT_EQ(tokensBefore, tokenBucket.available());
}
TEST(TokenBucket, drainOnFail) {
DynamicTokenBucket tokenBucket;
// Almost empty the bucket
EXPECT_TRUE(tokenBucket.consume(9, 10, 10, 1));
// Request more tokens than available
EXPECT_FALSE(tokenBucket.consume(5, 10, 10, 1));
EXPECT_DOUBLE_EQ(1.0, tokenBucket.available(10, 10, 1));
// Again request more tokens than available, but ask to drain
EXPECT_DOUBLE_EQ(1.0, tokenBucket.consumeOrDrain(5, 10, 10, 1));
EXPECT_DOUBLE_EQ(0.0, tokenBucket.consumeOrDrain(1, 10, 10, 1));
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment