Commit 57b8506e authored by Lior Israeli's avatar Lior Israeli Committed by Facebook GitHub Bot

Remove seperate logic for first value in histogram

Summary:
Currently there is logic that keeps histograms which only have one value reported to it separately.

The multi level histogram keeps the histogram for several time buckets. Just in case we had only one value added, it will keep reporting the value even if it was reported long ago and shouldn't appear in the time window level we are querying.

This is inconsistent with non histogram metrics (`avg` for example) reported from the same histogram, which don't have this logic.

A scenario would look like this:
1. Report value 5 to histogram
2. Immediately query the 60 second level -> `avg` reports 5, `p50` reports 5.
3. Wait 2 minutes
4. look at the 60 second level -> `avg` reports 0 (no value in the last 60 seconds) but `p50` keeps reporting 5.

I think this does more harm than good, so I suggest to kill this behavior.

Reviewed By: simpkins

Differential Revision: D26339016

fbshipit-source-id: 120485f8b78876e60ce66a534edcf86e7d9a836d
parent 38c1a117
......@@ -24,22 +24,18 @@ TimeseriesHistogram<T, CT, C>::TimeseriesHistogram(
ValueType min,
ValueType max,
const ContainerType& copyMe)
: buckets_(bucketSize, min, max, copyMe),
haveNotSeenValue_(true),
singleUniqueValue_(false) {}
: buckets_(bucketSize, min, max, copyMe) {}
template <typename T, typename CT, typename C>
void TimeseriesHistogram<T, CT, C>::addValue(
TimePoint now, const ValueType& value) {
buckets_.getByValue(value).addValue(now, value);
maybeHandleSingleUniqueValue(value);
}
template <typename T, typename CT, typename C>
void TimeseriesHistogram<T, CT, C>::addValue(
TimePoint now, const ValueType& value, uint64_t times) {
buckets_.getByValue(value).addValue(now, value, times);
maybeHandleSingleUniqueValue(value);
}
template <typename T, typename CT, typename C>
......@@ -56,33 +52,11 @@ void TimeseriesHistogram<T, CT, C>::addValues(
Bucket& myBucket = buckets_.getByIndex(n);
myBucket.addValueAggregated(now, histBucket.sum, histBucket.count);
}
// We don't bother with the singleUniqueValue_ tracking.
haveNotSeenValue_ = false;
singleUniqueValue_ = false;
}
template <typename T, typename CT, typename C>
void TimeseriesHistogram<T, CT, C>::maybeHandleSingleUniqueValue(
const ValueType& value) {
if (haveNotSeenValue_) {
firstValue_ = value;
singleUniqueValue_ = true;
haveNotSeenValue_ = false;
} else if (singleUniqueValue_) {
if (value != firstValue_) {
singleUniqueValue_ = false;
}
}
}
template <typename T, typename CT, typename C>
T TimeseriesHistogram<T, CT, C>::getPercentileEstimate(
double pct, size_t level) const {
if (singleUniqueValue_) {
return firstValue_;
}
return buckets_.getPercentileEstimate(
pct / 100.0, CountFromLevel(level), AvgFromLevel(level));
}
......@@ -90,10 +64,6 @@ T TimeseriesHistogram<T, CT, C>::getPercentileEstimate(
template <typename T, typename CT, typename C>
T TimeseriesHistogram<T, CT, C>::getPercentileEstimate(
double pct, TimePoint start, TimePoint end) const {
if (singleUniqueValue_) {
return firstValue_;
}
return buckets_.getPercentileEstimate(
pct / 100.0,
CountFromInterval(start, end),
......
......@@ -358,14 +358,6 @@ class TimeseriesHistogram {
TimePoint end_;
};
/*
* Special logic for the case of only one unique value registered
* (this can happen when clients don't pick good bucket ranges or have
* other bugs). It's a lot easier for clients to track down these issues
* if they are getting the correct value.
*/
void maybeHandleSingleUniqueValue(const ValueType& value);
void computeAvgData(ValueType* total, uint64_t* nsamples, size_t level) const;
void computeAvgData(
ValueType* total,
......@@ -380,8 +372,6 @@ class TimeseriesHistogram {
TimePoint end) const;
folly::detail::HistogramBuckets<ValueType, ContainerType> buckets_;
bool haveNotSeenValue_;
bool singleUniqueValue_;
ValueType firstValue_;
};
} // namespace folly
......
......@@ -532,46 +532,3 @@ TEST(TimeseriesHistogram, QueryByInterval) {
EXPECT_LE(actualCount - tolerance, estimatedCount);
}
}
TEST(TimeseriesHistogram, SingleUniqueValue) {
int values[] = {-1, 0, 500, 1000, 1500};
for (int ii = 0; ii < 5; ++ii) {
int value = values[ii];
TimeseriesHistogram<int> h(
10,
0,
1000,
MultiLevelTimeSeries<int>(
60, IntMTMHTS::NUM_LEVELS, IntMTMHTS::kDurations));
const int kNumIters = 1000;
for (int jj = 0; jj < kNumIters; ++jj) {
h.addValue(mkTimePoint(1), value);
}
h.update(mkTimePoint(1));
// since we've only added one unique value, all percentiles should
// be that value
EXPECT_EQ(h.getPercentileEstimate(10, 0), value);
EXPECT_EQ(h.getPercentileEstimate(50, 0), value);
EXPECT_EQ(h.getPercentileEstimate(99, 0), value);
// Things get trickier if there are multiple unique values.
const int kNewValue = 750;
for (int kk = 0; kk < 2 * kNumIters; ++kk) {
h.addValue(mkTimePoint(1), kNewValue);
}
h.update(mkTimePoint(1));
EXPECT_NEAR(h.getPercentileEstimate(50, 0), kNewValue + 5, 5);
if (value >= 0 && value <= 1000) {
// only do further testing if value is within our bucket range,
// else estimates can be wildly off
if (kNewValue > value) {
EXPECT_NEAR(h.getPercentileEstimate(10, 0), value + 5, 5);
EXPECT_NEAR(h.getPercentileEstimate(99, 0), kNewValue + 5, 5);
} else {
EXPECT_NEAR(h.getPercentileEstimate(10, 0), kNewValue + 5, 5);
EXPECT_NEAR(h.getPercentileEstimate(99, 0), value + 5, 5);
}
}
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment