Commit 5ab8094f authored by Marc Celani's avatar Marc Celani Committed by Facebook Github Bot

Improve TDigest merge performance, respect compression factor properly

Summary:
I suspect the algorithm in the tdigest paper is slightly off. Instead of setting boundaries at k = 1, 2, 3...d, it sets boundaries at k_last_elem + 1. This results in two issues:

1) It is possible to have more than d elements in the digest. Now, that is no longer possible, and we can properly reserve the right number of elements.
2) Additional floating point operations are computed than necessary.

Reviewed By: anakryiko

Differential Revision: D7654147

fbshipit-source-id: 131184d456353a9d936c4ed385e2b5e75d468676
parent 1b41d8dd
......@@ -47,12 +47,16 @@ namespace detail {
* Note that FMA has been tested here, but benchmarks have not shown it to be a
* performance improvement.
*/
double q_to_k(double q, double d) {
if (q >= 0.5) {
return d - d * std::sqrt(0.5 - 0.5 * q);
}
return d * std::sqrt(0.5 * q);
}
/*
* q_to_k is unused but left here as a comment for completeness.
* double q_to_k(double q, double d) {
* if (q >= 0.5) {
* return d - d * std::sqrt(0.5 - 0.5 * q);
* }
* return d * std::sqrt(0.5 * q);
* }
*/
double k_to_q(double k, double d) {
double k_div_d = k / d;
......@@ -76,10 +80,12 @@ TDigest TDigest::merge(Range<const double*> sortedValues) const {
result.count_ = count_ + sortedValues.size();
std::vector<Centroid> compressed;
compressed.reserve(2 * maxSize_);
compressed.reserve(maxSize_);
double q_0_times_count = 0.0;
double q_limit_times_count = detail::k_to_q(1, maxSize_) * result.count_;
double k_limit = 1;
double q_limit_times_count =
detail::k_to_q(k_limit++, maxSize_) * result.count_;
auto it_centroids = centroids_.begin();
auto it_sortedValues = sortedValues.begin();
......@@ -115,10 +121,7 @@ TDigest TDigest::merge(Range<const double*> sortedValues) const {
} else {
compressed.push_back(cur);
q_0_times_count += cur.weight();
double q_to_k_res =
detail::q_to_k(q_0_times_count / result.count_, maxSize_);
q_limit_times_count =
detail::k_to_q(q_to_k_res + 1, maxSize_) * result.count_;
q_limit_times_count = detail::k_to_q(k_limit++, maxSize_) * result.count_;
cur = next;
}
}
......@@ -154,11 +157,12 @@ TDigest TDigest::merge(Range<const TDigest*> digests) {
size_t maxSize = digests.begin()->maxSize_;
std::vector<Centroid> compressed;
compressed.reserve(2 * maxSize);
compressed.reserve(maxSize);
double q_0_times_count = 0.0;
double q_limit_times_count = detail::k_to_q(1, maxSize) * count;
double k_limit = 1;
double q_limit_times_count = detail::k_to_q(k_limit, maxSize) * count;
Centroid cur = centroids.front();
for (auto it = centroids.begin() + 1; it != centroids.end(); ++it) {
......@@ -169,8 +173,7 @@ TDigest TDigest::merge(Range<const TDigest*> digests) {
} else {
compressed.push_back(cur);
q_0_times_count += cur.weight();
double q_to_k_res = detail::q_to_k(q_0_times_count / count, maxSize);
q_limit_times_count = detail::k_to_q(q_to_k_res + 1, maxSize) * count;
q_limit_times_count = detail::k_to_q(k_limit++, maxSize) * count;
cur = *it;
}
}
......
......@@ -136,31 +136,31 @@ BENCHMARK_RELATIVE_NAMED_PARAM(estimateQuantile, 1000_p999, 1000, 0.999)
* ============================================================================
* folly/stats/test/TDigestBenchmark.cpp relative time/iter iters/s
* ============================================================================
* merge(100x1) 4.11us 243.60K
* merge(100x5) 52.31% 7.85us 127.43K
* merge(100x10) 30.13% 13.63us 73.39K
* merge(1000x1) 12.26% 33.49us 29.86K
* merge(1000x5) 6.23% 65.94us 15.17K
* merge(1000x10) 3.64% 112.76us 8.87K
* merge(100x1) 2.34us 427.35K
* merge(100x5) 37.44% 6.25us 159.99K
* merge(100x10) 19.38% 12.08us 82.80K
* merge(1000x1) 10.93% 21.41us 46.70K
* merge(1000x5) 4.57% 51.18us 19.54K
* merge(1000x10) 2.33% 100.28us 9.97K
* ----------------------------------------------------------------------------
* mergeDigests(100x60) 381.44us 2.62K
* mergeDigests(1000x60) 9.22% 4.14ms 241.73
* mergeDigests(100x60) 331.30us 3.02K
* mergeDigests(1000x60) 9.20% 3.60ms 277.74
* ----------------------------------------------------------------------------
* estimateQuantile(100x1_p001) 8.48ns 117.96M
* estimateQuantile(100_p01) 57.14% 14.84ns 67.41M
* estimateQuantile(100_p25) 11.95% 70.96ns 14.09M
* estimateQuantile(100_p50) 9.19% 92.27ns 10.84M
* estimateQuantile(100_p75) 12.03% 70.49ns 14.19M
* estimateQuantile(100_p99) 65.68% 12.91ns 77.47M
* estimateQuantile(100_p999) 97.06% 8.73ns 114.49M
* estimateQuantile(100x1_p001) 8.50ns 117.65M
* estimateQuantile(100_p01) 62.84% 13.53ns 73.93M
* estimateQuantile(100_p25) 13.56% 62.69ns 15.95M
* estimateQuantile(100_p50) 10.42% 81.59ns 12.26M
* estimateQuantile(100_p75) 14.85% 57.24ns 17.47M
* estimateQuantile(100_p99) 76.32% 11.14ns 89.79M
* estimateQuantile(100_p999) 127.14% 6.69ns 149.58M
* ----------------------------------------------------------------------------
* estimateQuantile(1000_p001) 23.30% 36.38ns 27.49M
* estimateQuantile(1000_p01) 6.46% 131.30ns 7.62M
* estimateQuantile(1000_p25) 1.51% 560.08ns 1.79M
* estimateQuantile(1000_p50) 1.08% 781.97ns 1.28M
* estimateQuantile(1000_p75) 1.49% 567.33ns 1.76M
* estimateQuantile(1000_p99) 6.42% 131.99ns 7.58M
* estimateQuantile(1000_p999) 27.09% 31.30ns 31.95M
* estimateQuantile(1000_p001) 26.35% 32.26ns 31.00M
* estimateQuantile(1000_p01) 7.75% 109.66ns 9.12M
* estimateQuantile(1000_p25) 1.74% 487.64ns 2.05M
* estimateQuantile(1000_p50) 1.24% 683.61ns 1.46M
* estimateQuantile(1000_p75) 1.75% 484.43ns 2.06M
* estimateQuantile(1000_p99) 7.87% 107.94ns 9.26M
* estimateQuantile(1000_p999) 34.58% 24.58ns 40.69M
* ============================================================================
*/
......
......@@ -47,7 +47,7 @@ TEST(TDigest, Basic) {
EXPECT_EQ(0.6, digest.estimateQuantile(0.001));
EXPECT_EQ(2.0 - 0.5, digest.estimateQuantile(0.01));
EXPECT_EQ(51.0 - 0.5, digest.estimateQuantile(0.5));
EXPECT_EQ(50.375, digest.estimateQuantile(0.5));
EXPECT_EQ(100.0 - 0.5, digest.estimateQuantile(0.99));
EXPECT_EQ(100.4, digest.estimateQuantile(0.999));
}
......@@ -75,7 +75,7 @@ TEST(TDigest, Merge) {
EXPECT_EQ(0.7, digest.estimateQuantile(0.001));
EXPECT_EQ(4.0 - 1.5, digest.estimateQuantile(0.01));
EXPECT_EQ(102.0 - 1.5, digest.estimateQuantile(0.5));
EXPECT_EQ(100.25, digest.estimateQuantile(0.5));
EXPECT_EQ(200.0 - 1.5, digest.estimateQuantile(0.99));
EXPECT_EQ(200.3, digest.estimateQuantile(0.999));
}
......@@ -115,8 +115,8 @@ TEST(TDigest, MergeLarge) {
EXPECT_EQ(1.5, digest.estimateQuantile(0.001));
EXPECT_EQ(10.5, digest.estimateQuantile(0.01));
EXPECT_EQ(500.5, digest.estimateQuantile(0.5));
EXPECT_EQ(990.5, digest.estimateQuantile(0.99));
EXPECT_EQ(500.25, digest.estimateQuantile(0.5));
EXPECT_EQ(990.25, digest.estimateQuantile(0.99));
EXPECT_EQ(999.5, digest.estimateQuantile(0.999));
}
......@@ -140,8 +140,8 @@ TEST(TDigest, MergeLargeAsDigests) {
EXPECT_EQ(1.5, digest.estimateQuantile(0.001));
EXPECT_EQ(10.5, digest.estimateQuantile(0.01));
EXPECT_EQ(500.5, digest.estimateQuantile(0.5));
EXPECT_EQ(990.5, digest.estimateQuantile(0.99));
EXPECT_EQ(500.25, digest.estimateQuantile(0.5));
EXPECT_EQ(990.25, digest.estimateQuantile(0.99));
EXPECT_EQ(999.5, digest.estimateQuantile(0.999));
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment