Commit 7fed85c2 authored by Dave Watson's avatar Dave Watson Committed by Facebook Github Bot

Use radix sort in TDigest

Summary:
Update the TDigest interface to accept unsorted or sorted data.
If unsorted, use a custom radix sort explicitly aimed at this use case:
doubles only, assume small data, so we don't use in-place and everything still
fits in cache (1000-10000 items).

Previously D8875766.  This version does not require newer boost versions, and puts
everything in the heap, so there should be no fiber overflows.

Reviewed By: yfeldblum

Differential Revision: D9197722

fbshipit-source-id: 10088cf14d1ff88e1072c00084e09129271e97ec
parent 8f45e92c
......@@ -15,6 +15,7 @@
*/
#include <folly/stats/TDigest.h>
#include <folly/stats/detail/DoubleRadixSort.h>
#include <algorithm>
#include <limits>
......@@ -101,7 +102,28 @@ TDigest::TDigest(
}
}
TDigest TDigest::merge(Range<const double*> sortedValues) const {
// Merge unsorted values by first sorting them. Use radix sort if
// possible. This implementation puts all additional memory in the
// heap, so that if called from fiber context we do not smash the
// stack. Otherwise it is very similar to boost::spreadsort.
TDigest TDigest::merge(Range<const double*> unsortedValues) const {
auto n = unsortedValues.size();
// We require 256 buckets per byte level, plus one count array we can reuse.
std::unique_ptr<uint64_t[]> buckets{new uint64_t[256 * 9]};
// Allocate input and tmp array
std::unique_ptr<double[]> tmp{new double[n * 2]};
auto out = tmp.get() + n;
auto in = tmp.get();
std::copy(unsortedValues.begin(), unsortedValues.end(), in);
detail::double_radix_sort(n, buckets.get(), in, out);
DCHECK(std::is_sorted(in, in + n));
return merge(presorted, Range<const double*>(in, in + n));
}
TDigest TDigest::merge(presorted_t, Range<const double*> sortedValues) const {
if (sortedValues.empty()) {
return *this;
}
......
......@@ -20,6 +20,7 @@
#include <vector>
#include <folly/Range.h>
#include <folly/Utility.h>
namespace folly {
......@@ -92,7 +93,8 @@ class TDigest {
* Returns a new TDigest constructed with values merged from the current
* digest and the given sortedValues.
*/
TDigest merge(Range<const double*> sortedValues) const;
TDigest merge(presorted_t, Range<const double*> sortedValues) const;
TDigest merge(Range<const double*> unsortedValues) const;
/*
* Returns a new TDigest constructed with values merged from the given
......
......@@ -64,7 +64,6 @@ DigestT DigestBuilder<DigestT>::build() {
for (const auto& vec : valuesVec) {
values.insert(values.end(), vec.begin(), vec.end());
}
std::sort(values.begin(), values.end());
DigestT digest(digestSize_);
digests.push_back(digest.merge(values));
}
......@@ -83,7 +82,6 @@ void DigestBuilder<DigestT>::append(double value) {
}
cpuLocalBuf->buffer.push_back(value);
if (cpuLocalBuf->buffer.size() == bufferSize_) {
std::sort(cpuLocalBuf->buffer.begin(), cpuLocalBuf->buffer.end());
if (!cpuLocalBuf->digest) {
cpuLocalBuf->digest = std::make_unique<DigestT>(digestSize_);
}
......
/*
* Copyright 2018-present Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <folly/stats/detail/DoubleRadixSort.h>
#include <algorithm>
#include <cstring>
namespace folly {
namespace detail {
// Convert floats to something comparable via radix sort.
// See http://stereopsis.com/radix.html for details.
static uint8_t getRadixBucket(double* f, uint8_t shift) {
uint64_t val;
memcpy(&val, f, sizeof(double));
uint64_t mask = -int64_t(val >> 63) | 0x8000000000000000;
auto adjusted = val ^ mask;
return (adjusted >> (64 - 8 - shift)) & 0xFF;
}
// MSB radix sort for doubles.
static void double_radix_sort_rec(
uint64_t n,
uint64_t* buckets,
uint8_t shift,
bool inout,
double* in,
double* out) {
// First pass: calculate bucket counts.
memset(buckets, 0, 256 * sizeof(uint64_t));
for (uint64_t i = 0; i < n; i++) {
buckets[getRadixBucket(&in[i], shift)]++;
}
// Second pass: calculate bucket start positions.
uint64_t tot = 0;
for (uint64_t i = 0; i < 256; i++) {
auto prev = tot;
tot += buckets[i];
buckets[i + 256] = prev;
}
// Third pass: Move based on radix counts.
for (uint64_t i = 0; i < n; i++) {
auto pos = buckets[getRadixBucket(&in[i], shift) + 256]++;
out[pos] = in[i];
}
// If we haven't used up all input bytes, recurse and sort. if the
// bucket is too small, use std::sort instead, and copy output to
// correct array.
if (shift < 56) {
tot = 0;
for (int i = 0; i < 256; i++) {
if (buckets[i] < 256) {
std::sort(out + tot, out + tot + buckets[i]);
if (!inout) {
memcpy(in + tot, out + tot, buckets[i] * sizeof(double));
}
} else {
double_radix_sort_rec(
buckets[i], buckets + 256, shift + 8, !inout, out + tot, in + tot);
}
tot += buckets[i];
}
}
}
void double_radix_sort(uint64_t n, uint64_t* buckets, double* in, double* tmp) {
// If array is too small, use std::sort directly.
if (n < 700) {
std::sort(in, in + n);
} else {
detail::double_radix_sort_rec(n, buckets, 0, false, in, tmp);
}
}
} // namespace detail
} // namespace folly
/*
* Copyright 2018-present Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <cstdint>
namespace folly {
namespace detail {
/*
* Sorts a double[] array using radix sort (falling back to std::sort
* for small arrays).
*
* n - size of array
* buckets - must be array of uint64_t of size 256*9.
* in & out - must be double arrays of size n. in contains input data.
*
* output - in array is sorted.
*/
void double_radix_sort(uint64_t n, uint64_t* buckets, double* in, double* tmp);
} // namespace detail
} // namespace folly
/*
* Copyright 2018-present Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <folly/portability/GTest.h>
#include <folly/Random.h>
#include <folly/stats/detail/DoubleRadixSort.h>
using namespace folly;
using namespace folly::detail;
TEST(DoubleRadixSort, Basic) {
std::unique_ptr<uint64_t[]> buckets(new uint64_t[256 * 9]);
for (int i = 0; i < 100; i++) {
size_t sz = folly::Random::rand32(0, 100000);
std::unique_ptr<double[]> in(new double[sz]);
std::unique_ptr<double[]> out(new double[sz]);
for (size_t j = 0; j < sz; j++) {
in[j] = folly::Random::randDouble(-100.0, 100.0);
}
double_radix_sort(sz, buckets.get(), in.get(), out.get());
EXPECT_TRUE(std::is_sorted(in.get(), in.get() + sz));
}
}
......@@ -93,19 +93,19 @@ BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(append, 10000x32, 10000, 32)
* ============================================================================
* folly/stats/test/DigestBuilderBenchmark.cpp relative time/iter iters/s
* ============================================================================
* append(1000x1) 25.70ns 38.91M
* append(1000x2) 99.12% 25.93ns 38.57M
* append(1000x4) 98.62% 26.06ns 38.37M
* append(1000x8) 96.23% 26.70ns 37.45M
* append(1000x16) 88.75% 28.96ns 34.53M
* append(1000x32) 66.81% 38.46ns 26.00M
* append(1000x1) 16.32ns 61.26M
* append(1000x2) 96.42% 16.93ns 59.07M
* append(1000x4) 93.57% 17.44ns 57.33M
* append(1000x8) 93.43% 17.47ns 57.24M
* append(1000x16) 92.96% 17.56ns 56.95M
* append(1000x32) 29.23% 55.84ns 17.91M
* ----------------------------------------------------------------------------
* append(10000x1) 25.39ns 39.38M
* append(10000x2) 98.81% 25.70ns 38.91M
* append(10000x4) 98.76% 25.71ns 38.90M
* append(10000x8) 98.95% 25.66ns 38.97M
* append(10000x16) 72.90% 34.83ns 28.71M
* append(10000x32) 85.15% 29.82ns 33.54M
* append(10000x1) 11.58ns 86.33M
* append(10000x2) 95.82% 12.09ns 82.72M
* append(10000x4) 89.00% 13.01ns 76.84M
* append(10000x8) 88.63% 13.07ns 76.51M
* append(10000x16) 88.22% 13.13ns 76.16M
* append(10000x32) 24.89% 46.54ns 21.49M
* ============================================================================
*/
......
......@@ -46,7 +46,7 @@ void merge(unsigned int iters, size_t maxSize, size_t bufSize) {
}
for (const auto& buffer : buffers) {
digest = digest.merge(buffer);
digest = digest.merge(folly::presorted, buffer);
}
}
......@@ -64,7 +64,6 @@ void mergeDigests(unsigned int iters, size_t maxSize, size_t nDigests) {
for (size_t j = 0; j < maxSize; ++j) {
buffer.push_back(distribution(generator));
}
std::sort(buffer.begin(), buffer.end());
digests.push_back(digest.merge(buffer));
}
}
......@@ -95,7 +94,6 @@ void estimateQuantile(unsigned int iters, size_t maxSize, double quantile) {
for (size_t j = 0; j < bufSize; ++j) {
buffer.push_back(values[i * bufSize + j]);
}
std::sort(buffer.begin(), buffer.end());
digest = digest.merge(buffer);
}
}
......
......@@ -136,10 +136,9 @@ TEST(TDigest, MergeLargeAsDigests) {
// Ensure that the values do not monotonically increase across digests.
std::random_shuffle(values.begin(), values.end());
for (int i = 0; i < 10; ++i) {
std::vector<double> sorted(
std::vector<double> unsorted_values(
values.begin() + (i * 100), values.begin() + (i + 1) * 100);
std::sort(sorted.begin(), sorted.end());
digests.push_back(digest.merge(sorted));
digests.push_back(digest.merge(unsorted_values));
}
digest = TDigest::merge(digests);
......@@ -166,8 +165,6 @@ TEST(TDigest, NegativeValues) {
values.push_back(-i);
}
std::sort(values.begin(), values.end());
digest = digest.merge(values);
EXPECT_EQ(200, digest.count());
......@@ -195,9 +192,6 @@ TEST(TDigest, NegativeValuesMergeDigests) {
negativeValues.push_back(-i);
}
std::sort(values.begin(), values.end());
std::sort(negativeValues.begin(), negativeValues.end());
auto digest1 = digest.merge(values);
auto digest2 = digest.merge(negativeValues);
......@@ -365,9 +359,6 @@ TEST_P(DistributionTest, ReasonableError) {
std::vector<TDigest> digests;
for (size_t i = 0; i < kNumSamples / 1000; ++i) {
auto it_l = values.begin() + (i * 1000);
auto it_r = it_l + 1000;
std::sort(it_l, it_r);
folly::Range<const double*> r(values, i * 1000, 1000);
if (digestMerge) {
digests.push_back(digest.merge(r));
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment