Remove seperate logic for first value in histogram

Summary: Currently there is logic that keeps histograms which only have one value reported to it separately. The multi level histogram keeps the histogram for several time buckets. Just in case we had only one value added, it will keep reporting the value even if it was reported long ago and shouldn't appear in the time window level we are querying. This is inconsistent with non histogram metrics (`avg` for example) reported from the same histogram, which don't have this logic. A scenario would look like this: 1. Report value 5 to histogram 2. Immediately query the 60 second level -> `avg` reports 5, `p50` reports 5. 3. Wait 2 minutes 4. look at the 60 second level -> `avg` reports 0 (no value in the last 60 seconds) but `p50` keeps reporting 5. I think this does more harm than good, so I suggest to kill this behavior. Reviewed By: simpkins Differential Revision: D26339016 fbshipit-source-id: 120485f8b78876e60ce66a534edcf86e7d9a836d

Remove seperate logic for first value in histogram
Summary: Currently there is logic that keeps histograms which only have one value reported to it separately. The multi level histogram keeps the histogram for several time buckets. Just in case we had only one value added, it will keep reporting the value even if it was reported long ago and shouldn't appear in the time window level we are querying. This is inconsistent with non histogram metrics (`avg` for example) reported from the same histogram, which don't have this logic. A scenario would look like this: 1. Report value 5 to histogram 2. Immediately query the 60 second level -> `avg` reports 5, `p50` reports 5. 3. Wait 2 minutes 4. look at the 60 second level -> `avg` reports 0 (no value in the last 60 seconds) but `p50` keeps reporting 5. I think this does more harm than good, so I suggest to kill this behavior. Reviewed By: simpkins Differential Revision: D26339016 fbshipit-source-id: 120485f8b78876e60ce66a534edcf86e7d9a836d
57b8506e · Lior Israeli · Facebook GitHub Bot · 38c1a117 · 57b8506e · 57b8506e
Commit 57b8506e authored Feb 15, 2021 by Lior Israeli Committed by Facebook GitHub Bot Feb 15, 2021
3 changed files
--- a/folly/stats/TimeseriesHistogram-inl.h
+++ b/folly/stats/TimeseriesHistogram-inl.h
@@ -24,22 +24,18 @@ TimeseriesHistogram<T, CT, C>::TimeseriesHistogram(
    ValueType min,
    ValueType max,
    const ContainerType& copyMe)
-    : buckets_(bucketSize, min, max, copyMe),
-      haveNotSeenValue_(true),
-      singleUniqueValue_(false) {}
+    : buckets_(bucketSize, min, max, copyMe) {}

 template <typename T, typename CT, typename C>
 void TimeseriesHistogram<T, CT, C>::addValue(
    TimePoint now, const ValueType& value) {
  buckets_.getByValue(value).addValue(now, value);
-  maybeHandleSingleUniqueValue(value);
 }

 template <typename T, typename CT, typename C>
 void TimeseriesHistogram<T, CT, C>::addValue(
    TimePoint now, const ValueType& value, uint64_t times) {
  buckets_.getByValue(value).addValue(now, value, times);
-  maybeHandleSingleUniqueValue(value);
 }

 template <typename T, typename CT, typename C>
@@ -56,33 +52,11 @@ void TimeseriesHistogram<T, CT, C>::addValues(
    Bucket& myBucket = buckets_.getByIndex(n);
    myBucket.addValueAggregated(now, histBucket.sum, histBucket.count);
  }
-
-  // We don't bother with the singleUniqueValue_ tracking.
-  haveNotSeenValue_ = false;
-  singleUniqueValue_ = false;
-}
-
-template <typename T, typename CT, typename C>
-void TimeseriesHistogram<T, CT, C>::maybeHandleSingleUniqueValue(
-    const ValueType& value) {
-  if (haveNotSeenValue_) {
-    firstValue_ = value;
-    singleUniqueValue_ = true;
-    haveNotSeenValue_ = false;
-  } else if (singleUniqueValue_) {
-    if (value != firstValue_) {
-      singleUniqueValue_ = false;
-    }
-  }
 }

 template <typename T, typename CT, typename C>
 T TimeseriesHistogram<T, CT, C>::getPercentileEstimate(
    double pct, size_t level) const {
-  if (singleUniqueValue_) {
-    return firstValue_;
-  }
-
  return buckets_.getPercentileEstimate(
      pct / 100.0, CountFromLevel(level), AvgFromLevel(level));
 }
@@ -90,10 +64,6 @@ T TimeseriesHistogram<T, CT, C>::getPercentileEstimate(
 template <typename T, typename CT, typename C>
 T TimeseriesHistogram<T, CT, C>::getPercentileEstimate(
    double pct, TimePoint start, TimePoint end) const {
-  if (singleUniqueValue_) {
-    return firstValue_;
-  }
-
  return buckets_.getPercentileEstimate(
      pct / 100.0,
      CountFromInterval(start, end),

--- a/folly/stats/TimeseriesHistogram.h
+++ b/folly/stats/TimeseriesHistogram.h
@@ -358,14 +358,6 @@ class TimeseriesHistogram {
    TimePoint end_;
  };

-  /*
-   * Special logic for the case of only one unique value registered
-   * (this can happen when clients don't pick good bucket ranges or have
-   * other bugs).  It's a lot easier for clients to track down these issues
-   * if they are getting the correct value.
-   */
-  void maybeHandleSingleUniqueValue(const ValueType& value);
-
  void computeAvgData(ValueType* total, uint64_t* nsamples, size_t level) const;
  void computeAvgData(
      ValueType* total,
@@ -380,8 +372,6 @@ class TimeseriesHistogram {
      TimePoint end) const;

  folly::detail::HistogramBuckets<ValueType, ContainerType> buckets_;
-  bool haveNotSeenValue_;
-  bool singleUniqueValue_;
  ValueType firstValue_;
 };
 } // namespace folly

--- a/folly/stats/test/TimeseriesHistogramTest.cpp
+++ b/folly/stats/test/TimeseriesHistogramTest.cpp
@@ -532,46 +532,3 @@ TEST(TimeseriesHistogram, QueryByInterval) {
    EXPECT_LE(actualCount - tolerance, estimatedCount);
  }
 }
-
-TEST(TimeseriesHistogram, SingleUniqueValue) {
-  int values[] = {-1, 0, 500, 1000, 1500};
-  for (int ii = 0; ii < 5; ++ii) {
-    int value = values[ii];
-    TimeseriesHistogram<int> h(
-        10,
-        0,
-        1000,
-        MultiLevelTimeSeries<int>(
-            60, IntMTMHTS::NUM_LEVELS, IntMTMHTS::kDurations));
-
-    const int kNumIters = 1000;
-    for (int jj = 0; jj < kNumIters; ++jj) {
-      h.addValue(mkTimePoint(1), value);
-    }
-    h.update(mkTimePoint(1));
-    // since we've only added one unique value, all percentiles should
-    // be that value
-    EXPECT_EQ(h.getPercentileEstimate(10, 0), value);
-    EXPECT_EQ(h.getPercentileEstimate(50, 0), value);
-    EXPECT_EQ(h.getPercentileEstimate(99, 0), value);
-
-    // Things get trickier if there are multiple unique values.
-    const int kNewValue = 750;
-    for (int kk = 0; kk < 2 * kNumIters; ++kk) {
-      h.addValue(mkTimePoint(1), kNewValue);
-    }
-    h.update(mkTimePoint(1));
-    EXPECT_NEAR(h.getPercentileEstimate(50, 0), kNewValue + 5, 5);
-    if (value >= 0 && value <= 1000) {
-      // only do further testing if value is within our bucket range,
-      // else estimates can be wildly off
-      if (kNewValue > value) {
-        EXPECT_NEAR(h.getPercentileEstimate(10, 0), value + 5, 5);
-        EXPECT_NEAR(h.getPercentileEstimate(99, 0), kNewValue + 5, 5);
-      } else {
-        EXPECT_NEAR(h.getPercentileEstimate(10, 0), kNewValue + 5, 5);
-        EXPECT_NEAR(h.getPercentileEstimate(99, 0), value + 5, 5);
-      }
-    }
-  }
-}