Sort the digest after merge to deal with floating point accuracy

Summary: When merging centroids together, we recalculate the mean and weight. This can sometimes lead to inaccuracy due to floating point arithmetic. In a bad scenario, this can actually lead to the digest being unsorted, which triggers assertions in the code. To fix this, sort the digest at the end of merge. Since this is bounded to 100 elements that are mostly sorted, this appears to be cheap according to benchmarks. Reviewed By: yfeldblum Differential Revision: D9213936 fbshipit-source-id: 5a2978a1b759b70206aef57245bef193aed65efd

Sort the digest after merge to deal with floating point accuracy
Summary: When merging centroids together, we recalculate the mean and weight. This can sometimes lead to inaccuracy due to floating point arithmetic. In a bad scenario, this can actually lead to the digest being unsorted, which triggers assertions in the code. To fix this, sort the digest at the end of merge. Since this is bounded to 100 elements that are mostly sorted, this appears to be cheap according to benchmarks. Reviewed By: yfeldblum Differential Revision: D9213936 fbshipit-source-id: 5a2978a1b759b70206aef57245bef193aed65efd
24024c41 · Marc Celani · Facebook Github Bot · de95d5f9 · 24024c41 · 24024c41
Commit 24024c41 authored Aug 08, 2018 by Marc Celani Committed by Facebook Github Bot Aug 08, 2018
4 changed files
--- a/folly/stats/TDigest.cpp
+++ b/folly/stats/TDigest.cpp
@@ -175,6 +175,10 @@ TDigest TDigest::merge(Range<const double*> sortedValues) const {
  result.sum_ += cur.add(sumsToMerge, weightsToMerge);
  compressed.push_back(cur);
  compressed.shrink_to_fit();
+
+  // Deal with floating point precision
+  std::sort(compressed.begin(), compressed.end());
+
  result.centroids_ = std::move(compressed);
  return result;
 }
@@ -273,6 +277,9 @@ TDigest TDigest::merge(Range<const TDigest*> digests) {
  compressed.push_back(cur);
  compressed.shrink_to_fit();

+  // Deal with floating point precision
+  std::sort(compressed.begin(), compressed.end());
+
  result.count_ = count;
  result.min_ = min;
  result.max_ = max;

--- a/folly/stats/test/DigestBuilderBenchmark.cpp
+++ b/folly/stats/test/DigestBuilderBenchmark.cpp
@@ -93,19 +93,19 @@ BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(append, 10000x32, 10000, 32)
 * ============================================================================
 * folly/stats/test/DigestBuilderBenchmark.cpp     relative  time/iter  iters/s
 * ============================================================================
- * append(1000x1)                                              25.90ns   38.61M
- * append(1000x2)                                    99.27%    26.09ns   38.33M
- * append(1000x4)                                    99.82%    25.95ns   38.54M
- * append(1000x8)                                    98.54%    26.28ns   38.05M
- * append(1000x16)                                   84.07%    30.81ns   32.46M
- * append(1000x32)                                   82.58%    31.36ns   31.88M
+ * append(1000x1)                                              25.70ns   38.91M
+ * append(1000x2)                                    99.12%    25.93ns   38.57M
+ * append(1000x4)                                    98.62%    26.06ns   38.37M
+ * append(1000x8)                                    96.23%    26.70ns   37.45M
+ * append(1000x16)                                   88.75%    28.96ns   34.53M
+ * append(1000x32)                                   66.81%    38.46ns   26.00M
 * ----------------------------------------------------------------------------
- * append(10000x1)                                             25.34ns   39.46M
- * append(10000x2)                                   99.75%    25.41ns   39.36M
- * append(10000x4)                                   99.24%    25.54ns   39.16M
- * append(10000x8)                                  106.97%    23.69ns   42.21M
- * append(10000x16)                                  87.82%    28.86ns   34.65M
- * append(10000x32)                                  72.99%    34.72ns   28.80M
+ * append(10000x1)                                             25.39ns   39.38M
+ * append(10000x2)                                   98.81%    25.70ns   38.91M
+ * append(10000x4)                                   98.76%    25.71ns   38.90M
+ * append(10000x8)                                   98.95%    25.66ns   38.97M
+ * append(10000x16)                                  72.90%    34.83ns   28.71M
+ * append(10000x32)                                  85.15%    29.82ns   33.54M
 * ============================================================================
 */


--- a/folly/stats/test/TDigestBenchmark.cpp
+++ b/folly/stats/test/TDigestBenchmark.cpp
@@ -138,33 +138,33 @@ BENCHMARK_RELATIVE_NAMED_PARAM(estimateQuantile, 1000_p999, 1000, 0.999)
 * ============================================================================
 * folly/stats/test/TDigestBenchmark.cpp           relative  time/iter  iters/s
 * ============================================================================
- * merge(100x1)                                                 2.23us  449.36K
- * merge(100x5)                                      59.15%     3.76us  265.78K
- * merge(100x10)                                     41.72%     5.33us  187.46K
- * merge(1000x1)                                     10.18%    21.86us   45.75K
- * merge(1000x5)                                      6.34%    35.11us   28.48K
- * merge(1000x10)                                     4.45%    50.01us   19.99K
+ * merge(100x1)                                                 2.30us  434.11K
+ * merge(100x5)                                      65.52%     3.52us  284.42K
+ * merge(100x10)                                     48.66%     4.73us  211.26K
+ * merge(1000x1)                                      9.37%    24.59us   40.67K
+ * merge(1000x5)                                      6.22%    37.03us   27.00K
+ * merge(1000x10)                                     4.60%    50.03us   19.99K
 * ----------------------------------------------------------------------------
- * mergeDigests(100x10)                                        24.65us   40.57K
- * mergeDigests(100x30)                              21.03%   117.21us    8.53K
- * mergeDigests(100x60)                               8.92%   276.47us    3.62K
- * mergeDigests(1000x60)                              0.88%     2.80ms   357.15
+ * mergeDigests(100x10)                                        21.50us   46.52K
+ * mergeDigests(100x30)                              20.03%   107.34us    9.32K
+ * mergeDigests(100x60)                               8.66%   248.29us    4.03K
+ * mergeDigests(1000x60)                              0.78%     2.75ms   363.17
 * ----------------------------------------------------------------------------
- * estimateQuantile(100x1_p001)                                 9.40ns  106.40M
- * estimateQuantile(100_p01)                         63.42%    14.82ns   67.49M
- * estimateQuantile(100_p25)                         14.81%    63.47ns   15.75M
- * estimateQuantile(100_p50)                         11.26%    83.47ns   11.98M
- * estimateQuantile(100_p75)                         15.22%    61.76ns   16.19M
- * estimateQuantile(100_p99)                         76.04%    12.36ns   80.91M
- * estimateQuantile(100_p999)                       115.85%     8.11ns  123.27M
+ * estimateQuantile(100x1_p001)                                 7.34ns  136.21M
+ * estimateQuantile(100_p01)                         68.10%    10.78ns   92.76M
+ * estimateQuantile(100_p25)                         11.51%    63.77ns   15.68M
+ * estimateQuantile(100_p50)                          7.98%    92.03ns   10.87M
+ * estimateQuantile(100_p75)                         14.99%    48.98ns   20.42M
+ * estimateQuantile(100_p99)                         77.57%     9.46ns  105.65M
+ * estimateQuantile(100_p999)                       130.42%     5.63ns  177.64M
 * ----------------------------------------------------------------------------
- * estimateQuantile(1000_p001)                       27.57%    34.08ns   29.34M
- * estimateQuantile(1000_p01)                         8.53%   110.24ns    9.07M
- * estimateQuantile(1000_p25)                         1.92%   488.24ns    2.05M
- * estimateQuantile(1000_p50)                         1.37%   684.40ns    1.46M
- * estimateQuantile(1000_p75)                         1.94%   485.23ns    2.06M
- * estimateQuantile(1000_p99)                         8.87%   105.90ns    9.44M
- * estimateQuantile(1000_p999)                       36.64%    25.65ns   38.99M
+ * estimateQuantile(1000_p001)                       16.69%    43.99ns   22.73M
+ * estimateQuantile(1000_p01)                         6.08%   120.74ns    8.28M
+ * estimateQuantile(1000_p25)                         1.43%   513.01ns    1.95M
+ * estimateQuantile(1000_p50)                         1.06%   693.28ns    1.44M
+ * estimateQuantile(1000_p75)                         1.66%   442.20ns    2.26M
+ * estimateQuantile(1000_p99)                         7.12%   103.08ns    9.70M
+ * estimateQuantile(1000_p999)                       22.98%    31.94ns   31.30M
 * ============================================================================
 */


--- a/folly/stats/test/TDigestTest.cpp
+++ b/folly/stats/test/TDigestTest.cpp
@@ -275,6 +275,41 @@ TEST(TDigest, LargeOutlierTest) {
      (int64_t)digest.estimateQuantile(0.90));
 }

+TEST(TDigest, FloatingPointSortedTest) {
+  // When combining centroids, floating point accuracy can lead to us building
+  // and unsorted digest if we are not careful. This tests that we are properly
+  // sorting the digest.
+  double val = 1.4;
+  TDigest digest1(100);
+  std::vector<double> values1;
+  for (int i = 1; i <= 100; ++i) {
+    values1.push_back(val);
+  }
+  digest1 = digest1.merge(values1);
+
+  TDigest digest2(100);
+  std::vector<double> values2;
+  for (int i = 1; i <= 100; ++i) {
+    values2.push_back(val);
+  }
+  digest2 = digest2.merge(values2);
+
+  std::array<TDigest, 2> a{{digest1, digest2}};
+  auto mergeDigest1 = TDigest::merge(a);
+
+  TDigest digest3(100);
+  std::vector<double> values3;
+  for (int i = 1; i <= 100; ++i) {
+    values3.push_back(val);
+  }
+  digest3 = digest2.merge(values3);
+  std::array<TDigest, 2> b{{digest3, mergeDigest1}};
+  auto mergeDigest2 = TDigest::merge(b);
+
+  auto centroids = mergeDigest2.getCentroids();
+  EXPECT_EQ(std::is_sorted(centroids.begin(), centroids.end()), true);
+}
+
 class DistributionTest
    : public ::testing::TestWithParam<
          std::tuple<std::pair<bool, size_t>, double, bool>> {};