Commit 44cad810 authored by Dave Watson's avatar Dave Watson Committed by Facebook Github Bot

Switch to radix sort for DigestBuilder

Summary: Radix sort (as implemented by boost's spreadsort) is 2x faster than std::sort.

Reviewed By: nbronson

Differential Revision: D8875766

fbshipit-source-id: d88a323e6c14d58f0820c2d6d8c3d578c1305482
parent b0eb4087
......@@ -18,6 +18,7 @@
#include <folly/stats/detail/DigestBuilder.h>
#include <boost/sort/spreadsort/spreadsort.hpp>
#include <algorithm>
#include <folly/concurrency/CacheLocality.h>
......@@ -64,7 +65,7 @@ DigestT DigestBuilder<DigestT>::build() {
for (const auto& vec : valuesVec) {
values.insert(values.end(), vec.begin(), vec.end());
}
std::sort(values.begin(), values.end());
boost::sort::spreadsort::spreadsort(values.begin(), values.end());
DigestT digest(digestSize_);
digests.push_back(digest.merge(values));
}
......@@ -83,7 +84,8 @@ void DigestBuilder<DigestT>::append(double value) {
}
cpuLocalBuf->buffer.push_back(value);
if (cpuLocalBuf->buffer.size() == bufferSize_) {
std::sort(cpuLocalBuf->buffer.begin(), cpuLocalBuf->buffer.end());
boost::sort::spreadsort::spreadsort(
cpuLocalBuf->buffer.begin(), cpuLocalBuf->buffer.end());
if (!cpuLocalBuf->digest) {
cpuLocalBuf->digest = std::make_unique<DigestT>(digestSize_);
}
......
......@@ -93,19 +93,19 @@ BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(append, 10000x32, 10000, 32)
* ============================================================================
* folly/stats/test/DigestBuilderBenchmark.cpp relative time/iter iters/s
* ============================================================================
* append(1000x1) 25.90ns 38.61M
* append(1000x2) 99.27% 26.09ns 38.33M
* append(1000x4) 99.82% 25.95ns 38.54M
* append(1000x8) 98.54% 26.28ns 38.05M
* append(1000x16) 84.07% 30.81ns 32.46M
* append(1000x32) 82.58% 31.36ns 31.88M
* append(1000x1) 18.18ns 55.00M
* append(1000x2) 97.43% 18.66ns 53.58M
* append(1000x4) 93.91% 19.36ns 51.65M
* append(1000x8) 93.77% 19.39ns 51.57M
* append(1000x16) 93.78% 19.39ns 51.57M
* append(1000x32) 45.60% 39.87ns 25.08M
* ----------------------------------------------------------------------------
* append(10000x1) 25.34ns 39.46M
* append(10000x2) 99.75% 25.41ns 39.36M
* append(10000x4) 99.24% 25.54ns 39.16M
* append(10000x8) 106.97% 23.69ns 42.21M
* append(10000x16) 87.82% 28.86ns 34.65M
* append(10000x32) 72.99% 34.72ns 28.80M
* append(10000x1) 13.53ns 73.91M
* append(10000x2) 91.27% 14.82ns 67.46M
* append(10000x4) 90.38% 14.97ns 66.80M
* append(10000x8) 89.26% 15.16ns 65.97M
* append(10000x16) 88.97% 15.21ns 65.76M
* append(10000x32) 36.74% 36.83ns 27.15M
* ============================================================================
*/
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment