Commit 13692fcf authored by Spencer Ahrens's avatar Spencer Ahrens Committed by Tudor Bosman

add bm_max/min_iters and bm_regex

Summary: These were handy in the old framework and easy enough to add.

Test Plan:
ran ThreadCachedIntTest - got better (but not perfect due to
subtraction of single-threaded baseline from multi-threaded results).

Reviewed By: sherman.ye@fb.com

FB internal diff: D490324
parent 9766ab7c
...@@ -20,7 +20,9 @@ ...@@ -20,7 +20,9 @@
#include "Foreach.h" #include "Foreach.h"
#include "json.h" #include "json.h"
#include "String.h" #include "String.h"
#include <algorithm> #include <algorithm>
#include <boost/regex.hpp>
#include <cmath> #include <cmath>
#include <iostream> #include <iostream>
#include <limits> #include <limits>
...@@ -32,6 +34,16 @@ using namespace std; ...@@ -32,6 +34,16 @@ using namespace std;
DEFINE_bool(benchmark, false, "Run benchmarks."); DEFINE_bool(benchmark, false, "Run benchmarks.");
DEFINE_bool(json, false, "Output in JSON format."); DEFINE_bool(json, false, "Output in JSON format.");
DEFINE_string(bm_regex, "",
"Only benchmarks whose names match this regex will be run.");
DEFINE_int64(bm_min_usec, 100,
"Minimum # of microseconds we'll accept for each benchmark.");
DEFINE_int32(bm_max_secs, 1,
"Maximum # of seconds we'll spend on each benchmark.");
namespace folly { namespace folly {
BenchmarkSuspender::NanosecondsSpent BenchmarkSuspender::nsSpent; BenchmarkSuspender::NanosecondsSpent BenchmarkSuspender::nsSpent;
...@@ -191,17 +203,18 @@ static double runBenchmarkGetNSPerIteration(const BenchmarkFun& fun, ...@@ -191,17 +203,18 @@ static double runBenchmarkGetNSPerIteration(const BenchmarkFun& fun,
CHECK_EQ(1, ts.tv_nsec) << "Clock too coarse, upgrade your kernel."; CHECK_EQ(1, ts.tv_nsec) << "Clock too coarse, upgrade your kernel.";
resolutionInNs = ts.tv_nsec; resolutionInNs = ts.tv_nsec;
} }
// Whe choose a minimum minimum (sic) of 10,000 nanoseconds, but if // We choose a minimum minimum (sic) of 100,000 nanoseconds, but if
// the clock resolution is worse than that, it will be larger. In // the clock resolution is worse than that, it will be larger. In
// essence we're aiming at making the quantization noise 0.01%. // essence we're aiming at making the quantization noise 0.01%.
static const auto minNanoseconds = min(resolutionInNs * 100000, 1000000000UL); static const auto minNanoseconds =
max(FLAGS_bm_min_usec * 1000UL, min(resolutionInNs * 100000, 1000000000UL));
// We do measurements in several epochs and take the minimum, to // We do measurements in several epochs and take the minimum, to
// account for jitter. // account for jitter.
static const unsigned int epochs = 1000; static const unsigned int epochs = 1000;
// We establish a total time budget as we don't want a measurement // We establish a total time budget as we don't want a measurement
// to take too long. This will curtail the number of actual epochs. // to take too long. This will curtail the number of actual epochs.
static const uint64_t timeBudgetInNs = 1000000000; const uint64_t timeBudgetInNs = FLAGS_bm_max_secs * 1000000000;
timespec global; timespec global;
CHECK_EQ(0, clock_gettime(CLOCK_REALTIME, &global)); CHECK_EQ(0, clock_gettime(CLOCK_REALTIME, &global));
...@@ -209,7 +222,7 @@ static double runBenchmarkGetNSPerIteration(const BenchmarkFun& fun, ...@@ -209,7 +222,7 @@ static double runBenchmarkGetNSPerIteration(const BenchmarkFun& fun,
size_t actualEpochs = 0; size_t actualEpochs = 0;
for (; actualEpochs < epochs; ++actualEpochs) { for (; actualEpochs < epochs; ++actualEpochs) {
for (unsigned int n = 1; n < (1U << 30); n *= 2) { for (unsigned int n = 1; n < (1UL << 30); n *= 2) {
auto const nsecs = fun(n); auto const nsecs = fun(n);
if (nsecs < minNanoseconds) { if (nsecs < minNanoseconds) {
continue; continue;
...@@ -394,15 +407,24 @@ void runBenchmarks() { ...@@ -394,15 +407,24 @@ void runBenchmarks() {
vector<tuple<const char*, const char*, double>> results; vector<tuple<const char*, const char*, double>> results;
results.reserve(benchmarks.size() - 1); results.reserve(benchmarks.size() - 1);
std::unique_ptr<boost::regex> bmRegex;
if (!FLAGS_bm_regex.empty()) {
bmRegex.reset(new boost::regex(FLAGS_bm_regex));
}
// PLEASE KEEP QUIET. MEASUREMENTS IN PROGRESS. // PLEASE KEEP QUIET. MEASUREMENTS IN PROGRESS.
auto const globalBaseline = runBenchmarkGetNSPerIteration( auto const globalBaseline = runBenchmarkGetNSPerIteration(
get<2>(benchmarks.front()), 0); get<2>(benchmarks.front()), 0);
FOR_EACH_RANGE (i, 1, benchmarks.size()) { FOR_EACH_RANGE (i, 1, benchmarks.size()) {
auto elapsed = strcmp(get<1>(benchmarks[i]), "-") == 0 double elapsed = 0.0;
? 0.0 // skip the separators if (!strcmp(get<1>(benchmarks[i]), "-") == 0) { // skip separators
: runBenchmarkGetNSPerIteration(get<2>(benchmarks[i]), if (bmRegex && !boost::regex_search(get<1>(benchmarks[i]), *bmRegex)) {
continue;
}
elapsed = runBenchmarkGetNSPerIteration(get<2>(benchmarks[i]),
globalBaseline); globalBaseline);
}
results.emplace_back(get<0>(benchmarks[i]), results.emplace_back(get<0>(benchmarks[i]),
get<1>(benchmarks[i]), elapsed); get<1>(benchmarks[i]), elapsed);
} }
......
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
DECLARE_bool(benchmark); DECLARE_bool(benchmark);
namespace folly { namespace folly {
/** /**
......
...@@ -238,7 +238,7 @@ int main(int argc, char** argv) { ...@@ -238,7 +238,7 @@ int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv); testing::InitGoogleTest(&argc, argv);
google::ParseCommandLineFlags(&argc, &argv, true); google::ParseCommandLineFlags(&argc, &argv, true);
google::SetCommandLineOptionWithMode( google::SetCommandLineOptionWithMode(
"bm_max_iters", "10000000", google::SET_FLAG_IF_DEFAULT "bm_min_usec", "10000", google::SET_FLAG_IF_DEFAULT
); );
if (FLAGS_benchmark) { if (FLAGS_benchmark) {
folly::runBenchmarks(); folly::runBenchmarks();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment