Commit 586dd85c authored by Stephen Chen's avatar Stephen Chen Committed by Peter Griess

Port TimeseriesHistogram from common/stats to folly

Summary:
Port TimeseriesHistogram from common/stats to folly. Similarly to
MultiLevelTimeSeries and Histogram classes we've converted before.

Test Plan:
Ported the old unittest for TimeseriesHistogram from common/stats to
folly/test. The same unittest is left in tact and is still passing as well. Ran
all tests in folly/test and common/stats

Reviewed By: simpkins@fb.com

FB internal diff: D1054291
parent 2d980c3d
......@@ -30,11 +30,15 @@
#include "folly/stats/MultiLevelTimeSeries.h"
#include "folly/stats/MultiLevelTimeSeries-defs.h"
#include "folly/stats/TimeseriesHistogram.h"
#include "folly/stats/TimeseriesHistogram-defs.h"
namespace folly {
template class BucketedTimeSeries<int64_t>;
template class Histogram<int64_t>;
template class detail::HistogramBuckets<int64_t, Histogram<int64_t>::Bucket>;
template class MultiLevelTimeSeries<int64_t>;
template class TimeseriesHistogram<int64_t>;
} // folly
......@@ -21,6 +21,7 @@
#include <string>
#include <vector>
#include <glog/logging.h>
#include "folly/stats/BucketedTimeSeries.h"
namespace folly {
......
/*
* Copyright 2013 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef FOLLY_TIMESERIES_HISTOGRAM_DEF_H_
#define FOLLY_TIMESERIES_HISTOGRAM_DEF_H_
#include "folly/Conv.h"
#include "folly/stats/Histogram-defs.h"
#include "folly/stats/MultiLevelTimeSeries-defs.h"
#include "folly/stats/BucketedTimeSeries-defs.h"
namespace folly {
template <class T, class TT, class C>
template <typename ReturnType>
ReturnType TimeseriesHistogram<T, TT, C>::avg(int level) const {
ValueType total = ValueType();
int64_t count = 0;
for (int b = 0; b < buckets_.getNumBuckets(); ++b) {
const auto& levelObj = buckets_.getByIndex(b).getLevel(level);
total += levelObj.sum();
count += levelObj.count();
}
return folly::detail::avgHelper<ReturnType>(total, count);
}
template <class T, class TT, class C>
template <typename ReturnType>
ReturnType TimeseriesHistogram<T, TT, C>::avg(TimeType start,
TimeType end) const {
ValueType total = ValueType();
int64_t count = 0;
for (int b = 0; b < buckets_.getNumBuckets(); ++b) {
const auto& levelObj = buckets_.getByIndex(b).getLevel(start, end);
total += levelObj.sum(start, end);
count += levelObj.count(start, end);
}
return folly::detail::avgHelper<ReturnType>(total, count);
}
template <class T, class TT, class C>
template <typename ReturnType>
ReturnType TimeseriesHistogram<T, TT, C>::rate(TimeType start,
TimeType end) const {
ValueType total = ValueType();
TimeType elapsed(0);
for (int b = 0; b < buckets_.getNumBuckets(); ++b) {
const auto& level = buckets_.getByIndex(b).getLevel(start);
total += level.sum(start, end);
elapsed = std::max(elapsed, level.elapsed(start, end));
}
return folly::detail::rateHelper<ReturnType, TimeType, TimeType>(
total, elapsed);
}
template <typename T, typename TT, typename C>
TimeseriesHistogram<T, TT, C>::TimeseriesHistogram(ValueType bucketSize,
ValueType min,
ValueType max,
const ContainerType& copyMe)
: buckets_(bucketSize, min, max, copyMe),
haveNotSeenValue_(true),
singleUniqueValue_(false) {
}
template <typename T, typename TT, typename C>
void TimeseriesHistogram<T, TT, C>::addValue(TimeType now,
const ValueType& value) {
buckets_.getByValue(value).addValue(now, value);
maybeHandleSingleUniqueValue(value);
}
template <typename T, typename TT, typename C>
void TimeseriesHistogram<T, TT, C>::addValue(TimeType now,
const ValueType& value,
int64_t times) {
buckets_.getByValue(value).addValue(now, value, times);
maybeHandleSingleUniqueValue(value);
}
template <typename T, typename TT, typename C>
void TimeseriesHistogram<T, TT, C>::addValues(
TimeType now, const folly::Histogram<ValueType>& hist) {
CHECK_EQ(hist.getMin(), getMin());
CHECK_EQ(hist.getMax(), getMax());
CHECK_EQ(hist.getBucketSize(), getBucketSize());
CHECK_EQ(hist.getNumBuckets(), getNumBuckets());
for (unsigned int n = 0; n < hist.getNumBuckets(); ++n) {
const typename folly::Histogram<ValueType>::Bucket& histBucket =
hist.getBucketByIndex(n);
Bucket& myBucket = buckets_.getByIndex(n);
myBucket.addValueAggregated(now, histBucket.sum, histBucket.count);
}
// We don't bother with the singleUniqueValue_ tracking.
haveNotSeenValue_ = false;
singleUniqueValue_ = false;
}
template <typename T, typename TT, typename C>
void TimeseriesHistogram<T, TT, C>::maybeHandleSingleUniqueValue(
const ValueType& value) {
if (haveNotSeenValue_) {
firstValue_ = value;
singleUniqueValue_ = true;
haveNotSeenValue_ = false;
} else if (singleUniqueValue_) {
if (value != firstValue_) {
singleUniqueValue_ = false;
}
}
}
template <typename T, typename TT, typename C>
T TimeseriesHistogram<T, TT, C>::getPercentileEstimate(double pct,
int level) const {
if (singleUniqueValue_) {
return firstValue_;
}
return buckets_.getPercentileEstimate(pct / 100.0, CountFromLevel(level),
AvgFromLevel(level));
}
template <typename T, typename TT, typename C>
T TimeseriesHistogram<T, TT, C>::getPercentileEstimate(double pct,
TimeType start,
TimeType end) const {
if (singleUniqueValue_) {
return firstValue_;
}
return buckets_.getPercentileEstimate(pct / 100.0,
CountFromInterval(start, end),
AvgFromInterval<T>(start, end));
}
template <typename T, typename TT, typename C>
int TimeseriesHistogram<T, TT, C>::getPercentileBucketIdx(
double pct,
int level
) const {
return buckets_.getPercentileBucketIdx(pct / 100.0, CountFromLevel(level));
}
template <typename T, typename TT, typename C>
int TimeseriesHistogram<T, TT, C>::getPercentileBucketIdx(double pct,
TimeType start,
TimeType end) const {
return buckets_.getPercentileBucketIdx(pct / 100.0,
CountFromInterval(start, end));
}
template <typename T, typename TT, typename C>
T TimeseriesHistogram<T, TT, C>::rate(int level) const {
ValueType total = ValueType();
TimeType elapsed(0);
for (int b = 0; b < buckets_.getNumBuckets(); ++b) {
const auto& levelObj = buckets_.getByIndex(b).getLevel(level);
total += levelObj.sum();
elapsed = std::max(elapsed, levelObj.elapsed());
}
return elapsed == TimeType(0) ? 0 : (total / elapsed.count());
}
template <typename T, typename TT, typename C>
void TimeseriesHistogram<T, TT, C>::clear() {
for (int i = 0; i < buckets_.getNumBuckets(); i++) {
buckets_.getByIndex(i).clear();
}
}
template <typename T, typename TT, typename C>
void TimeseriesHistogram<T, TT, C>::update(TimeType now) {
for (int i = 0; i < buckets_.getNumBuckets(); i++) {
buckets_.getByIndex(i).update(now);
}
}
template <typename T, typename TT, typename C>
std::string TimeseriesHistogram<T, TT, C>::getString(int level) const {
std::string result;
for (int i = 0; i < buckets_.getNumBuckets(); i++) {
if (i > 0) {
toAppend(",", &result);
}
const ContainerType& cont = buckets_.getByIndex(i);
toAppend(buckets_.getBucketMin(i),
":", cont.count(level),
":", cont.avg<ValueType>(level), &result);
}
return result;
}
template <typename T, typename TT, typename C>
std::string TimeseriesHistogram<T, TT, C>::getString(TimeType start,
TimeType end) const {
std::string result;
for (int i = 0; i < buckets_.getNumBuckets(); i++) {
if (i > 0) {
toAppend(",", &result);
}
const ContainerType& cont = buckets_.getByIndex(i);
toAppend(buckets_.getBucketMin(i),
":", cont.count(start, end),
":", cont.avg(start, end), &result);
}
return result;
}
} // namespace folly
#endif
/*
* Copyright 2013 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef FOLLY_TIMESERIES_HISTOGRAM_H_
#define FOLLY_TIMESERIES_HISTOGRAM_H_
#include <string>
#include <boost/static_assert.hpp>
#include "folly/stats/Histogram.h"
#include "folly/stats/MultiLevelTimeSeries.h"
namespace folly {
/*
* TimeseriesHistogram tracks data distributions as they change over time.
*
* Specifically, it is a bucketed histogram with different value ranges assigned
* to each bucket. Within each bucket is a MultiLevelTimeSeries from
* 'folly/stats/MultiLevelTimeSeries.h'. This means that each bucket contains a
* different set of data for different historical time periods, and one can
* query data distributions over different trailing time windows.
*
* For example, this can answer questions: "What is the data distribution over
* the last minute? Over the last 10 minutes? Since I last cleared this
* histogram?"
*
* The class can also estimate percentiles and answer questions like: "What was
* the 99th percentile data value over the last 10 minutes?"
*
* Note: that depending on the size of your buckets and the smoothness
* of your data distribution, the estimate may be way off from the actual
* value. In particular, if the given percentile falls outside of the bucket
* range (i.e. your buckets range in 0 - 100,000 but the 99th percentile is
* around 115,000) this estimate may be very wrong.
*
* The memory usage for a typical histogram is roughly 3k * (# of buckets). All
* insertion operations are amortized O(1), and all queries are O(# of buckets).
*/
template <class T, class TT=std::chrono::seconds,
class C=folly::MultiLevelTimeSeries<T, TT>>
class TimeseriesHistogram {
private:
// NOTE: T must be equivalent to _signed_ numeric type for our math.
BOOST_STATIC_ASSERT(std::numeric_limits<T>::is_signed);
public:
// values to be inserted into container
typedef T ValueType;
// the container type we use internally for each bucket
typedef C ContainerType;
// The time type.
typedef TT TimeType;
/*
* Create a TimeSeries histogram and initialize the bucketing and levels.
*
* The buckets are created by chopping the range [min, max) into pieces
* of size bucketSize, with the last bucket being potentially shorter. Two
* additional buckets are always created -- the "under" bucket for the range
* (-inf, min) and the "over" bucket for the range [max, +inf).
*
* @param bucketSize the width of each bucket
* @param min the smallest value for the bucket range.
* @param max the largest value for the bucket range
* @param defaultContainer a pre-initialized timeseries with the desired
* number of levels and their durations.
*/
TimeseriesHistogram(ValueType bucketSize, ValueType min, ValueType max,
const ContainerType& defaultContainer);
/* Return the bucket size of each bucket in the histogram. */
ValueType getBucketSize() const { return buckets_.getBucketSize(); }
/* Return the min value at which bucketing begins. */
ValueType getMin() const { return buckets_.getMin(); }
/* Return the max value at which bucketing ends. */
ValueType getMax() const { return buckets_.getMax(); }
/* Return the number of levels of the Timeseries object in each bucket */
int getNumLevels() const {
return buckets_.getByIndex(0).numLevels();
}
/* Return the number of buckets */
int getNumBuckets() const { return buckets_.getNumBuckets(); }
/* Return the bucket index into which the given value would fall. */
int getBucketIdx(const ValueType& value) const;
/*
* Return the threshold of the bucket for the given index in range
* [0..numBuckets). The bucket will have range [thresh, thresh + bucketSize)
* or [thresh, max), whichever is shorter.
*/
ValueType getBucketMin(int bucketIdx) const {
return buckets_.getBucketMin(bucketIdx);
}
/* Return the actual timeseries in the given bucket (for reading only!) */
const ContainerType& getBucket(int bucketIdx) const {
return buckets_.getByIndex(bucketIdx);
}
/* Total count of values at the given timeseries level (all buckets). */
int64_t count(int level) const {
int64_t total = 0;
for (int b = 0; b < buckets_.getNumBuckets(); ++b) {
total += buckets_.getByIndex(b).count(level);
}
return total;
}
/* Total count of values added during the given interval (all buckets). */
int64_t count(TimeType start, TimeType end) const {
int64_t total = 0;
for (int b = 0; b < buckets_.getNumBuckets(); ++b) {
total += buckets_.getByIndex(b).count(start, end);
}
return total;
}
/* Total sum of values at the given timeseries level (all buckets). */
ValueType sum(int level) const {
ValueType total = ValueType();
for (int b = 0; b < buckets_.getNumBuckets(); ++b) {
total += buckets_.getByIndex(b).sum(level);
}
return total;
}
/* Total sum of values added during the given interval (all buckets). */
ValueType sum(TimeType start, TimeType end) const {
ValueType total = ValueType();
for (int b = 0; b < buckets_.getNumBuckets(); ++b) {
total += buckets_.getByIndex(b).sum(start, end);
}
return total;
}
/* Average of values at the given timeseries level (all buckets). */
template <typename ReturnType=double>
ReturnType avg(int level) const;
/* Average of values added during the given interval (all buckets). */
template <typename ReturnType=double>
ReturnType avg(TimeType start, TimeType end) const;
/*
* Rate at the given timeseries level (all buckets).
* This is the sum of all values divided by the time interval (in seconds).
*/
ValueType rate(int level) const;
/*
* Rate for the given interval (all buckets).
* This is the sum of all values divided by the time interval (in seconds).
*/
template <typename ReturnType=double>
ReturnType rate(TimeType start, TimeType end) const;
/*
* Update every underlying timeseries object with the given timestamp. You
* must call this directly before querying to ensure that the data in all
* buckets is decayed properly.
*/
void update(TimeType now);
/* clear all the data from the histogram. */
void clear();
/* Add a value into the histogram with timestamp 'now' */
void addValue(TimeType now, const ValueType& value);
/* Add a value the given number of times with timestamp 'now' */
void addValue(TimeType now, const ValueType& value, int64_t times);
/*
* Add all of the values from the specified histogram.
*
* All of the values will be added to the current time-slot.
*
* One use of this is for thread-local caching of frequently updated
* histogram data. For example, each thread can store a thread-local
* Histogram that is updated frequently, and only add it to the global
* TimeseriesHistogram once a second.
*/
void addValues(TimeType now, const folly::Histogram<ValueType>& values);
/*
* Return an estimate of the value at the given percentile in the histogram
* in the given timeseries level. The percentile is estimated as follows:
*
* - We retrieve a count of the values in each bucket (at the given level)
* - We determine via the counts which bucket the given percentile falls in.
* - We assume the average value in the bucket is also its median
* - We then linearly interpolate within the bucket, by assuming that the
* distribution is uniform in the two value ranges [left, median) and
* [median, right) where [left, right) is the bucket value range.
*
* Caveats:
* - If the histogram is empty, this always returns ValueType(), usually 0.
* - For the 'under' and 'over' special buckets, their range is unbounded
* on one side. In order for the interpolation to work, we assume that
* the average value in the bucket is equidistant from the two edges of
* the bucket. In other words, we assume that the distance between the
* average and the known bound is equal to the distance between the average
* and the unknown bound.
*/
ValueType getPercentileEstimate(double pct, int level) const;
/*
* Return an estimate of the value at the given percentile in the histogram
* in the given historical interval. Please see the documentation for
* getPercentileEstimate(int pct, int level) for the explanation of the
* estimation algorithm.
*/
ValueType getPercentileEstimate(double pct, TimeType start, TimeType end)
const;
/*
* Return the bucket index that the given percentile falls into (in the
* given timeseries level). This index can then be used to retrieve either
* the bucket threshold, or other data from inside the bucket.
*/
int getPercentileBucketIdx(double pct, int level) const;
/*
* Return the bucket index that the given percentile falls into (in the
* given historical interval). This index can then be used to retrieve either
* the bucket threshold, or other data from inside the bucket.
*/
int getPercentileBucketIdx(double pct, TimeType start, TimeType end) const;
/* Get the bucket threshold for the bucket containing the given pct. */
int getPercentileBucketMin(double pct, int level) const {
return getBucketMin(getPercentileBucketIdx(pct, level));
}
/* Get the bucket threshold for the bucket containing the given pct. */
int getPercentileBucketMin(double pct, TimeType start, TimeType end) const {
return getBucketMin(getPercentileBucketIdx(pct, start, end));
}
/*
* Print out serialized data from all buckets at the given level.
* Format is: BUCKET [',' BUCKET ...]
* Where: BUCKET == bucketMin ':' count ':' avg
*/
std::string getString(int level) const;
/*
* Print out serialized data for all buckets in the historical interval.
* For format, please see getString(int level).
*/
std::string getString(TimeType start, TimeType end) const;
private:
typedef ContainerType Bucket;
struct CountFromLevel {
explicit CountFromLevel(int level) : level_(level) {}
uint64_t operator()(const ContainerType& bucket) const {
return bucket.count(level_);
}
private:
int level_;
};
struct CountFromInterval {
explicit CountFromInterval(TimeType start, TimeType end)
: start_(start),
end_(end) {}
uint64_t operator()(const ContainerType& bucket) const {
return bucket.count(start_, end_);
}
private:
TimeType start_;
TimeType end_;
};
struct AvgFromLevel {
explicit AvgFromLevel(int level) : level_(level) {}
ValueType operator()(const ContainerType& bucket) const {
return bucket.template avg<ValueType>(level_);
}
private:
int level_;
};
template <typename ReturnType>
struct AvgFromInterval {
explicit AvgFromInterval(TimeType start, TimeType end)
: start_(start),
end_(end) {}
ReturnType operator()(const ContainerType& bucket) const {
return bucket.template avg<ReturnType>(start_, end_);
}
private:
TimeType start_;
TimeType end_;
};
/*
* Special logic for the case of only one unique value registered
* (this can happen when clients don't pick good bucket ranges or have
* other bugs). It's a lot easier for clients to track down these issues
* if they are getting the correct value.
*/
void maybeHandleSingleUniqueValue(const ValueType& value);
folly::detail::HistogramBuckets<ValueType, ContainerType> buckets_;
bool haveNotSeenValue_;
bool singleUniqueValue_;
ValueType firstValue_;
};
} // folly
#endif // FOLLY_TIMESERIES_HISTOGRAM_H_
/*
* Copyright 2013 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "folly/stats/TimeseriesHistogram.h"
#include "folly/stats/TimeseriesHistogram-defs.h"
#include <gtest/gtest.h>
using namespace std;
using namespace folly;
using std::chrono::seconds;
namespace IntMTMHTS {
enum Levels {
MINUTE,
TEN_MINUTE,
HOUR,
ALLTIME,
NUM_LEVELS,
};
const seconds kDurations[] = {
seconds(60), seconds(600), seconds(3600), seconds(0)
};
};
namespace IntMHTS {
enum Levels {
MINUTE,
HOUR,
ALLTIME,
NUM_LEVELS,
};
const seconds kDurations[] = {
seconds(60), seconds(3600), seconds(0)
};
};
typedef std::mt19937 RandomInt32;
TEST(TimeseriesHistogram, Percentile) {
RandomInt32 random(5);
// [10, 109], 12 buckets including above and below
{
TimeseriesHistogram<int> h(10, 10, 110,
MultiLevelTimeSeries<int>(
60, IntMTMHTS::NUM_LEVELS,
IntMTMHTS::kDurations));
EXPECT_EQ(0, h.getPercentileEstimate(0, IntMTMHTS::ALLTIME));
EXPECT_EQ(12, h.getNumBuckets());
EXPECT_EQ(10, h.getBucketSize());
EXPECT_EQ(10, h.getMin());
EXPECT_EQ(110, h.getMax());
for (int i = 0; i < h.getNumBuckets(); ++i) {
EXPECT_EQ(4, h.getBucket(i).numLevels());
}
int maxVal = 120;
h.addValue(seconds(0), 0);
h.addValue(seconds(0), maxVal);
for (int i = 0; i < 98; i++) {
h.addValue(seconds(0), random() % maxVal);
}
h.update(std::chrono::duration_cast<std::chrono::seconds>(
std::chrono::system_clock::now().time_since_epoch()));
// bucket 0 stores everything below min, so its minimum
// is the lowest possible number
EXPECT_EQ(std::numeric_limits<int>::min(),
h.getPercentileBucketMin(1, IntMTMHTS::ALLTIME));
EXPECT_EQ(110, h.getPercentileBucketMin(99, IntMTMHTS::ALLTIME));
EXPECT_EQ(-2, h.getPercentileEstimate(0, IntMTMHTS::ALLTIME));
EXPECT_EQ(-1, h.getPercentileEstimate(1, IntMTMHTS::ALLTIME));
EXPECT_EQ(119, h.getPercentileEstimate(99, IntMTMHTS::ALLTIME));
EXPECT_EQ(120, h.getPercentileEstimate(100, IntMTMHTS::ALLTIME));
}
}
TEST(TimeseriesHistogram, String) {
RandomInt32 random(5);
// [10, 109], 12 buckets including above and below
{
TimeseriesHistogram<int> hist(10, 10, 110,
MultiLevelTimeSeries<int>(
60, IntMTMHTS::NUM_LEVELS,
IntMTMHTS::kDurations));
int maxVal = 120;
hist.addValue(seconds(0), 0);
hist.addValue(seconds(0), maxVal);
for (int i = 0; i < 98; i++) {
hist.addValue(seconds(0), random() % maxVal);
}
hist.update(seconds(0));
const char* const kStringValues1[IntMTMHTS::NUM_LEVELS] = {
"-2147483648:12:4,10:8:13,20:8:24,30:6:34,40:13:46,50:8:54,60:7:64,"
"70:7:74,80:8:84,90:10:94,100:3:103,110:10:115",
"-2147483648:12:4,10:8:13,20:8:24,30:6:34,40:13:46,50:8:54,60:7:64,"
"70:7:74,80:8:84,90:10:94,100:3:103,110:10:115",
"-2147483648:12:4,10:8:13,20:8:24,30:6:34,40:13:46,50:8:54,60:7:64,"
"70:7:74,80:8:84,90:10:94,100:3:103,110:10:115",
"-2147483648:12:4,10:8:13,20:8:24,30:6:34,40:13:46,50:8:54,60:7:64,"
"70:7:74,80:8:84,90:10:94,100:3:103,110:10:115",
};
CHECK_EQ(IntMTMHTS::NUM_LEVELS, hist.getNumLevels());
for (int level = 0; level < hist.getNumLevels(); ++level) {
EXPECT_EQ(kStringValues1[level], hist.getString(level));
}
const char* const kStringValues2[IntMTMHTS::NUM_LEVELS] = {
"-2147483648:12:4,10:8:13,20:8:24,30:6:34,40:13:46,50:8:54,60:7:64,"
"70:7:74,80:8:84,90:10:94,100:3:103,110:10:115",
"-2147483648:12:4,10:8:13,20:8:24,30:6:34,40:13:46,50:8:54,60:7:64,"
"70:7:74,80:8:84,90:10:94,100:3:103,110:10:115",
"-2147483648:12:4,10:8:13,20:8:24,30:6:34,40:13:46,50:8:54,60:7:64,"
"70:7:74,80:8:84,90:10:94,100:3:103,110:10:115",
"-2147483648:12:4,10:8:13,20:8:24,30:6:34,40:13:46,50:8:54,60:7:64,"
"70:7:74,80:8:84,90:10:94,100:3:103,110:10:115",
};
CHECK_EQ(IntMTMHTS::NUM_LEVELS, hist.getNumLevels());
for (int level = 0; level < hist.getNumLevels(); ++level) {
EXPECT_EQ(kStringValues2[level], hist.getString(level));
}
}
}
TEST(TimeseriesHistogram, Clear) {
{
TimeseriesHistogram<int> hist(10, 0, 100,
MultiLevelTimeSeries<int>(
60, IntMTMHTS::NUM_LEVELS,
IntMTMHTS::kDurations));
for (int now = 0; now < 3600; now++) {
for (int i = 0; i < 100; i++) {
hist.addValue(seconds(now), i, 2); // adds each item 2 times
}
}
// check clearing
hist.clear();
for (int b = 0; b < hist.getNumBuckets(); ++b) {
EXPECT_EQ(0, hist.getBucket(b).count(IntMTMHTS::MINUTE));
EXPECT_EQ(0, hist.getBucket(b).count(IntMTMHTS::TEN_MINUTE));
EXPECT_EQ(0, hist.getBucket(b).count(IntMTMHTS::HOUR));
EXPECT_EQ(0, hist.getBucket(b).count(IntMTMHTS::ALLTIME));
}
for (int pct = 0; pct <= 100; pct++) {
EXPECT_EQ(0, hist.getPercentileBucketMin(pct, IntMTMHTS::MINUTE));
EXPECT_EQ(0, hist.getPercentileBucketMin(pct, IntMTMHTS::TEN_MINUTE));
EXPECT_EQ(0, hist.getPercentileBucketMin(pct, IntMTMHTS::HOUR));
EXPECT_EQ(0, hist.getPercentileBucketMin(pct, IntMTMHTS::ALLTIME));
EXPECT_EQ(0, hist.getPercentileEstimate(pct, IntMTMHTS::MINUTE));
EXPECT_EQ(0, hist.getPercentileEstimate(pct, IntMTMHTS::TEN_MINUTE));
EXPECT_EQ(0, hist.getPercentileEstimate(pct, IntMTMHTS::HOUR));
EXPECT_EQ(0, hist.getPercentileEstimate(pct, IntMTMHTS::ALLTIME));
}
}
}
TEST(TimeseriesHistogram, Basic) {
{
TimeseriesHistogram<int> hist(10, 0, 100,
MultiLevelTimeSeries<int>(
60, IntMTMHTS::NUM_LEVELS,
IntMTMHTS::kDurations));
for (int now = 0; now < 3600; now++) {
for (int i = 0; i < 100; i++) {
hist.addValue(seconds(now), i);
}
}
hist.update(seconds(3599));
for (int pct = 1; pct <= 100; pct++) {
int expected = (pct - 1) / 10 * 10;
EXPECT_EQ(expected, hist.getPercentileBucketMin(pct, IntMTMHTS::MINUTE));
EXPECT_EQ(expected, hist.getPercentileBucketMin(pct,
IntMTMHTS::TEN_MINUTE));
EXPECT_EQ(expected, hist.getPercentileBucketMin(pct, IntMTMHTS::HOUR));
EXPECT_EQ(expected, hist.getPercentileBucketMin(pct, IntMTMHTS::ALLTIME));
}
for (int b = 1; (b + 1) < hist.getNumBuckets(); ++b) {
EXPECT_EQ(600, hist.getBucket(b).count(IntMTMHTS::MINUTE));
EXPECT_EQ(6000, hist.getBucket(b).count(IntMTMHTS::TEN_MINUTE));
EXPECT_EQ(36000, hist.getBucket(b).count(IntMTMHTS::HOUR));
EXPECT_EQ(36000, hist.getBucket(b).count(IntMTMHTS::ALLTIME));
}
EXPECT_EQ(0, hist.getBucket(0).count(IntMTMHTS::MINUTE));
EXPECT_EQ(0, hist.getBucket(hist.getNumBuckets() - 1).count(
IntMTMHTS::MINUTE));
}
// -----------------
{
TimeseriesHistogram<int> hist(10, 0, 100,
MultiLevelTimeSeries<int>(
60, IntMTMHTS::NUM_LEVELS,
IntMTMHTS::kDurations));
for (int now = 0; now < 3600; now++) {
for (int i = 0; i < 100; i++) {
hist.addValue(seconds(now), i, 2); // adds each item 2 times
}
}
hist.update(seconds(3599));
for (int pct = 1; pct <= 100; pct++) {
int expected = (pct - 1) / 10 * 10;
EXPECT_EQ(expected, hist.getPercentileBucketMin(pct, IntMTMHTS::MINUTE));
EXPECT_EQ(expected, hist.getPercentileBucketMin(pct,
IntMTMHTS::TEN_MINUTE));
EXPECT_EQ(expected, hist.getPercentileBucketMin(pct, IntMTMHTS::HOUR));
EXPECT_EQ(expected, hist.getPercentileBucketMin(pct, IntMTMHTS::ALLTIME));
}
for (int b = 1; (b + 1) < hist.getNumBuckets(); ++b) {
EXPECT_EQ(600 * 2, hist.getBucket(b).count(IntMTMHTS::MINUTE));
EXPECT_EQ(6000 * 2, hist.getBucket(b).count(IntMTMHTS::TEN_MINUTE));
EXPECT_EQ(36000 * 2, hist.getBucket(b).count(IntMTMHTS::HOUR));
EXPECT_EQ(36000 * 2, hist.getBucket(b).count(IntMTMHTS::ALLTIME));
}
EXPECT_EQ(0, hist.getBucket(0).count(IntMTMHTS::MINUTE));
EXPECT_EQ(0, hist.getBucket(hist.getNumBuckets() - 1).count(
IntMTMHTS::MINUTE));
}
// -----------------
{
TimeseriesHistogram<int> hist(10, 0, 100,
MultiLevelTimeSeries<int>(
60, IntMTMHTS::NUM_LEVELS,
IntMTMHTS::kDurations));
for (int now = 0; now < 3600; now++) {
for (int i = 0; i < 50; i++) {
hist.addValue(seconds(now), i * 2, 2); // adds each item 2 times
}
}
hist.update(seconds(3599));
for (int pct = 1; pct <= 100; pct++) {
int expected = (pct - 1) / 10 * 10;
EXPECT_EQ(expected, hist.getPercentileBucketMin(pct, IntMTMHTS::MINUTE));
EXPECT_EQ(expected, hist.getPercentileBucketMin(pct,
IntMTMHTS::TEN_MINUTE));
EXPECT_EQ(expected, hist.getPercentileBucketMin(pct, IntMTMHTS::HOUR));
EXPECT_EQ(expected, hist.getPercentileBucketMin(pct, IntMTMHTS::ALLTIME));
}
EXPECT_EQ(0, hist.getBucket(0).count(IntMTMHTS::MINUTE));
EXPECT_EQ(0, hist.getBucket(0).count(IntMTMHTS::TEN_MINUTE));
EXPECT_EQ(0, hist.getBucket(0).count(IntMTMHTS::HOUR));
EXPECT_EQ(0, hist.getBucket(0).count(IntMTMHTS::ALLTIME));
EXPECT_EQ(0, hist.getBucket(hist.getNumBuckets() - 1).count(
IntMTMHTS::MINUTE));
EXPECT_EQ(0,
hist.getBucket(hist.getNumBuckets() - 1).
count(IntMTMHTS::TEN_MINUTE));
EXPECT_EQ(0, hist.getBucket(hist.getNumBuckets() - 1).count(
IntMTMHTS::HOUR));
EXPECT_EQ(0,
hist.getBucket(hist.getNumBuckets() - 1).count(
IntMTMHTS::ALLTIME));
for (int b = 1; (b + 1) < hist.getNumBuckets(); ++b) {
EXPECT_EQ(600, hist.getBucket(b).count(IntMTMHTS::MINUTE));
EXPECT_EQ(6000, hist.getBucket(b).count(IntMTMHTS::TEN_MINUTE));
EXPECT_EQ(36000, hist.getBucket(b).count(IntMTMHTS::HOUR));
EXPECT_EQ(36000, hist.getBucket(b).count(IntMTMHTS::ALLTIME));
}
for (int i = 0; i < 100; ++i) {
hist.addValue(seconds(3599), 200 + i);
}
hist.update(seconds(3599));
EXPECT_EQ(100,
hist.getBucket(hist.getNumBuckets() - 1).count(
IntMTMHTS::ALLTIME));
}
}
TEST(TimeseriesHistogram, QueryByInterval) {
TimeseriesHistogram<int> mhts(8, 8, 120,
MultiLevelTimeSeries<int>(
60, IntMHTS::NUM_LEVELS,
IntMHTS::kDurations));
mhts.update(seconds(0));
int curTime;
for (curTime = 0; curTime < 7200; curTime++) {
mhts.addValue(seconds(curTime), 1);
}
for (curTime = 7200; curTime < 7200 + 3540; curTime++) {
mhts.addValue(seconds(curTime), 10);
}
for (curTime = 7200 + 3540; curTime < 7200 + 3600; curTime++) {
mhts.addValue(seconds(curTime), 100);
}
mhts.update(seconds(7200 + 3600 - 1));
struct TimeInterval {
TimeInterval(int s, int e)
: start(s), end(e) {}
std::chrono::seconds start;
std::chrono::seconds end;
};
TimeInterval intervals[12] = {
{ curTime - 60, curTime },
{ curTime - 3600, curTime },
{ curTime - 7200, curTime },
{ curTime - 3600, curTime - 60 },
{ curTime - 7200, curTime - 60 },
{ curTime - 7200, curTime - 3600 },
{ curTime - 50, curTime - 20 },
{ curTime - 3020, curTime - 20 },
{ curTime - 7200, curTime - 20 },
{ curTime - 3000, curTime - 1000 },
{ curTime - 7200, curTime - 1000 },
{ curTime - 7200, curTime - 3600 },
};
int expectedSums[12] = {
6000, 41400, 32400, 35400, 32129, 16200, 3000, 33600, 32308, 20000, 27899,
16200
};
int expectedCounts[12] = {
60, 3600, 7200, 3540, 7139, 3600, 30, 3000, 7178, 2000, 6199, 3600
};
// The first 7200 values added all fell below the histogram minimum,
// and went into the bucket that tracks all of the too-small values.
// This bucket reports a minimum value of the smallest possible integer.
int belowMinBucket = std::numeric_limits<int>::min();
int expectedValues[12][3] = {
{96, 96, 96},
{ 8, 8, 96},
{ belowMinBucket, belowMinBucket, 8}, // alltime
{ 8, 8, 8},
{ belowMinBucket, belowMinBucket, 8}, // alltime
{ belowMinBucket, belowMinBucket, 8}, // alltime
{96, 96, 96},
{ 8, 8, 96},
{ belowMinBucket, belowMinBucket, 8}, // alltime
{ 8, 8, 8},
{ belowMinBucket, belowMinBucket, 8}, // alltime
{ belowMinBucket, belowMinBucket, 8} // alltime
};
for (int i = 0; i < 12; i++) {
const auto& itv = intervals[i];
int s = mhts.sum(itv.start, itv.end);
EXPECT_EQ(expectedSums[i], s);
int c = mhts.count(itv.start, itv.end);
EXPECT_EQ(expectedCounts[i], c);
}
// 3 levels
for (int i = 1; i <= 100; i++) {
EXPECT_EQ(96, mhts.getPercentileBucketMin(i, 0));
EXPECT_EQ(96, mhts.getPercentileBucketMin(i, seconds(curTime - 60),
seconds(curTime)));
EXPECT_EQ(8, mhts.getPercentileBucketMin(i, seconds(curTime - 3540),
seconds(curTime - 60)));
}
EXPECT_EQ(8, mhts.getPercentileBucketMin(1, 1));
EXPECT_EQ(8, mhts.getPercentileBucketMin(98, 1));
EXPECT_EQ(96, mhts.getPercentileBucketMin(99, 1));
EXPECT_EQ(96, mhts.getPercentileBucketMin(100, 1));
EXPECT_EQ(belowMinBucket, mhts.getPercentileBucketMin(1, 2));
EXPECT_EQ(belowMinBucket, mhts.getPercentileBucketMin(66, 2));
EXPECT_EQ(8, mhts.getPercentileBucketMin(67, 2));
EXPECT_EQ(8, mhts.getPercentileBucketMin(99, 2));
EXPECT_EQ(96, mhts.getPercentileBucketMin(100, 2));
// 0 is currently the value for bucket 0 (below min)
for (int i = 0; i < 12; i++) {
const auto& itv = intervals[i];
int v = mhts.getPercentileBucketMin(1, itv.start, itv.end);
EXPECT_EQ(expectedValues[i][0], v);
v = mhts.getPercentileBucketMin(50, itv.start, itv.end);
EXPECT_EQ(expectedValues[i][1], v);
v = mhts.getPercentileBucketMin(99, itv.start, itv.end);
EXPECT_EQ(expectedValues[i][2], v);
}
for (int i = 0; i < 12; i++) {
const auto& itv = intervals[i];
int c = mhts.count(itv.start, itv.end);
// Some of the older intervals that fall in the alltime bucket
// are off by 1 or 2 in their estimated counts.
size_t tolerance = 0;
if (itv.start <= seconds(curTime - 7200)) {
tolerance = 2;
} else if (itv.start <= seconds(curTime - 3000)) {
tolerance = 1;
}
size_t actualCount = (itv.end - itv.start).count();
size_t estimatedCount = mhts.count(itv.start, itv.end);
EXPECT_GE(actualCount, estimatedCount);
EXPECT_LE(actualCount - tolerance, estimatedCount);
}
}
TEST(TimeseriesHistogram, SingleUniqueValue) {
int values[] = {-1, 0, 500, 1000, 1500};
for (int ii = 0; ii < 5; ++ii) {
int value = values[ii];
TimeseriesHistogram<int> h(10, 0, 1000,
MultiLevelTimeSeries<int>(
60, IntMTMHTS::NUM_LEVELS,
IntMTMHTS::kDurations));
const int kNumIters = 1000;
for (int jj = 0; jj < kNumIters; ++jj) {
h.addValue(seconds(time(nullptr)), value);
}
h.update(seconds(time(nullptr)));
// since we've only added one unique value, all percentiles should
// be that value
EXPECT_EQ(h.getPercentileEstimate(10, 0), value);
EXPECT_EQ(h.getPercentileEstimate(50, 0), value);
EXPECT_EQ(h.getPercentileEstimate(99, 0), value);
// Things get trickier if there are multiple unique values.
const int kNewValue = 750;
for (int kk = 0; kk < 2*kNumIters; ++kk) {
h.addValue(seconds(time(nullptr)), kNewValue);
}
h.update(seconds(time(nullptr)));
EXPECT_NEAR(h.getPercentileEstimate(50, 0), kNewValue+5, 5);
if (value >= 0 && value <= 1000) {
// only do further testing if value is within our bucket range,
// else estimates can be wildly off
if (kNewValue > value) {
EXPECT_NEAR(h.getPercentileEstimate(10, 0), value+5, 5);
EXPECT_NEAR(h.getPercentileEstimate(99, 0), kNewValue+5, 5);
} else {
EXPECT_NEAR(h.getPercentileEstimate(10, 0), kNewValue+5, 5);
EXPECT_NEAR(h.getPercentileEstimate(99, 0), value+5, 5);
}
}
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment