Add support for returning number of benchmark iterations

Summary: I'm looping through a large number of test cases in a benchmark and I'm interested in the average time per test case rather than the total time, which is what I'm currently seeing. In order to get the average time, I need to be able to tell the benchmark module how many iterations have been run. This change adds _MULTI variants of the different BENCHMARK_ macros that allow for returning the actual number of iterations that have been run, e.g.: BENCHMARK_MULTI(benchmarkSomething) { std::vector<int> testCases { 0, 1, 1, 2, 3, 5 }; for (int c : testCases) { doSomething(c); } return testCases.size(); } Test Plan: * fbconfig -r folly && fbmake runtests * added new test cases to example benchmark code Reviewed By: simpkins@fb.com Subscribers: folly@lists, london_search@ FB internal diff: D1356437

Add support for returning number of benchmark iterations
Summary: I'm looping through a large number of test cases in a benchmark and I'm interested in the average time per test case rather than the total time, which is what I'm currently seeing. In order to get the average time, I need to be able to tell the benchmark module how many iterations have been run. This change adds _MULTI variants of the different BENCHMARK_ macros that allow for returning the actual number of iterations that have been run, e.g.: BENCHMARK_MULTI(benchmarkSomething) { std::vector<int> testCases { 0, 1, 1, 2, 3, 5 }; for (int c : testCases) { doSomething(c); } return testCases.size(); } Test Plan: * fbconfig -r folly && fbmake runtests * added new test cases to example benchmark code Reviewed By: simpkins@fb.com Subscribers: folly@lists, london_search@ FB internal diff: D1356437
aed6ef6f · Marcus Holland-Moritz · Anton Likhtarov · ec08f7d4 · aed6ef6f · aed6ef6f
Commit aed6ef6f authored Jun 06, 2014 by Marcus Holland-Moritz Committed by Anton Likhtarov Jun 09, 2014
Showing with 177 additions and 15 deletions

folly/Benchmark.cpp folly/Benchmark.cpp +5 -4

folly/Benchmark.h folly/Benchmark.h +109 -11

folly/gen/test/Bench.h folly/gen/test/Bench.h +2 -0

folly/test/BenchmarkTest.cpp folly/test/BenchmarkTest.cpp +61 -0

No files found.
--- a/folly/Benchmark.cpp
+++ b/folly/Benchmark.cpp
@@ -51,7 +51,7 @@ namespace folly {

 BenchmarkSuspender::NanosecondsSpent BenchmarkSuspender::nsSpent;

-typedef function<uint64_t(unsigned int)> BenchmarkFun;
+typedef function<detail::TimeIterPair(unsigned int)> BenchmarkFun;
 static vector<tuple<const char*, const char*, BenchmarkFun>> benchmarks;

 // Add the global baseline
@@ -231,13 +231,14 @@ static double runBenchmarkGetNSPerIteration(const BenchmarkFun& fun,

  for (; actualEpochs < epochs; ++actualEpochs) {
    for (unsigned int n = FLAGS_bm_min_iters; n < (1UL << 30); n *= 2) {
-      auto const nsecs = fun(n);
-      if (nsecs < minNanoseconds) {
+      auto const nsecsAndIter = fun(n);
+      if (nsecsAndIter.first < minNanoseconds) {
        continue;
      }
      // We got an accurate enough timing, done. But only save if
      // smaller than the current result.
-      epochResults[actualEpochs] = max(0.0, double(nsecs) / n - globalBaseline);
+      epochResults[actualEpochs] = max(0.0, double(nsecsAndIter.first) /
+                                       nsecsAndIter.second - globalBaseline);
      // Done with the current epoch, we got a meaningful timing.
      break;
    }

--- a/folly/Benchmark.h
+++ b/folly/Benchmark.h
@@ -56,13 +56,15 @@ namespace detail {
 */
 enum Clock { DEFAULT_CLOCK_ID = CLOCK_REALTIME };

+typedef std::pair<uint64_t, unsigned int> TimeIterPair;
+
 /**
 * Adds a benchmark wrapped in a std::function. Only used
 * internally. Pass by value is intentional.
 */
 void addBenchmarkImpl(const char* file,
                      const char* name,
-                      std::function<uint64_t(unsigned int)>);
+                      std::function<TimeIterPair(unsigned int)>);

 /**
 * Takes the difference between two timespec values. end is assumed to
@@ -185,24 +187,27 @@ typename std::enable_if<
  == 2
 >::type
 addBenchmark(const char* file, const char* name, Lambda&& lambda) {
-  auto execute = [=](unsigned int times) -> uint64_t {
+  auto execute = [=](unsigned int times) {
    BenchmarkSuspender::nsSpent = 0;
    timespec start, end;
+    unsigned int niter;

    // CORE MEASUREMENT STARTS
    auto const r1 = clock_gettime(detail::DEFAULT_CLOCK_ID, &start);
-    lambda(times);
+    niter = lambda(times);
    auto const r2 = clock_gettime(detail::DEFAULT_CLOCK_ID, &end);
    // CORE MEASUREMENT ENDS

    CHECK_EQ(0, r1);
    CHECK_EQ(0, r2);

-    return detail::timespecDiff(end, start) - BenchmarkSuspender::nsSpent;
+    return detail::TimeIterPair(
+      detail::timespecDiff(end, start) - BenchmarkSuspender::nsSpent,
+      niter);
  };

  detail::addBenchmarkImpl(file, name,
-                           std::function<uint64_t(unsigned int)>(execute));
+    std::function<detail::TimeIterPair(unsigned int)>(execute));
 }

 /**
@@ -218,9 +223,11 @@ typename std::enable_if<
 >::type
 addBenchmark(const char* file, const char* name, Lambda&& lambda) {
  addBenchmark(file, name, [=](unsigned int times) {
+      unsigned int niter = 0;
      while (times-- > 0) {
-        lambda();
+        niter += lambda();
      }
+      return niter;
    });
 }

@@ -254,14 +261,28 @@ void doNotOptimizeAway(T&& datum) {
 * Introduces a benchmark function. Used internally, see BENCHMARK and
 * friends below.
 */
-#define BENCHMARK_IMPL(funName, stringName, paramType, paramName)       \
+#define BENCHMARK_IMPL(funName, stringName, rv, paramType, paramName)   \
  static void funName(paramType);                                       \
  static bool FB_ANONYMOUS_VARIABLE(follyBenchmarkUnused) = (           \
    ::folly::addBenchmark(__FILE__, stringName,                         \
-      [](paramType paramName) { funName(paramName); }),                 \
+      [](paramType paramName) -> unsigned { funName(paramName);         \
+                                            return rv; }),              \
    true);                                                              \
  static void funName(paramType paramName)

+/**
+ * Introduces a benchmark function with support for returning the actual
+ * number of iterations. Used internally, see BENCHMARK_MULTI and friends
+ * below.
+ */
+#define BENCHMARK_MULTI_IMPL(funName, stringName, paramType, paramName) \
+  static unsigned funName(paramType);                                   \
+  static bool FB_ANONYMOUS_VARIABLE(follyBenchmarkUnused) = (           \
+    ::folly::addBenchmark(__FILE__, stringName,                         \
+      [](paramType paramName) { return funName(paramName); }),          \
+    true);                                                              \
+  static unsigned funName(paramType paramName)
+
 /**
 * Introduces a benchmark function. Use with either one one or two
 * arguments. The first is the name of the benchmark. Use something
@@ -283,6 +304,29 @@ void doNotOptimizeAway(T&& datum) {
 */
 #define BENCHMARK(name, ...)                                    \
  BENCHMARK_IMPL(                                               \
+    name,                                                       \
+    FB_STRINGIZE(name),                                         \
+    FB_ARG_2_OR_1(1, ## __VA_ARGS__),                           \
+    FB_ONE_OR_NONE(unsigned, ## __VA_ARGS__),                   \
+    __VA_ARGS__)
+
+/**
+ * Like BENCHMARK above, but allows the user to return the actual
+ * number of iterations executed in the function body. This can be
+ * useful if the benchmark function doesn't know upfront how many
+ * iterations it's going to run or if it runs through a certain
+ * number of test cases, e.g.:
+ *
+ * BENCHMARK_MULTI(benchmarkSomething) {
+ *   std::vector<int> testCases { 0, 1, 1, 2, 3, 5 };
+ *   for (int c : testCases) {
+ *     doSomething(c);
+ *   }
+ *   return testCases.size();
+ * }
+ */
+#define BENCHMARK_MULTI(name, ...)                              \
+  BENCHMARK_MULTI_IMPL(                                         \
    name,                                                       \
    FB_STRINGIZE(name),                                         \
    FB_ONE_OR_NONE(unsigned, ## __VA_ARGS__),                   \
@@ -313,6 +357,13 @@ void doNotOptimizeAway(T&& datum) {
 #define BENCHMARK_PARAM(name, param)                                    \
  BENCHMARK_NAMED_PARAM(name, param, param)

+/**
+ * Same as BENCHMARK_PARAM, but allows to return the actual number of
+ * iterations that have been run.
+ */
+#define BENCHMARK_PARAM_MULTI(name, param)                              \
+  BENCHMARK_NAMED_PARAM_MULTI(name, param, param)
+
 /*
 * Like BENCHMARK_PARAM(), but allows a custom name to be specified for each
 * parameter, rather than using the parameter value.
@@ -340,11 +391,25 @@ void doNotOptimizeAway(T&& datum) {
  BENCHMARK_IMPL(                                                       \
      FB_CONCATENATE(name, FB_CONCATENATE(_, param_name)),              \
      FB_STRINGIZE(name) "(" FB_STRINGIZE(param_name) ")",              \
+      iters,                                                            \
      unsigned,                                                         \
      iters) {                                                          \
    name(iters, ## __VA_ARGS__);                                        \
  }

+/**
+ * Same as BENCHMARK_NAMED_PARAM, but allows to return the actual number
+ * of iterations that have been run.
+ */
+#define BENCHMARK_NAMED_PARAM_MULTI(name, param_name, ...)              \
+  BENCHMARK_MULTI_IMPL(                                                 \
+      FB_CONCATENATE(name, FB_CONCATENATE(_, param_name)),              \
+      FB_STRINGIZE(name) "(" FB_STRINGIZE(param_name) ")",              \
+      unsigned,                                                         \
+      iters) {                                                          \
+    return name(iters, ## __VA_ARGS__);                                 \
+  }
+
 /**
 * Just like BENCHMARK, but prints the time relative to a
 * baseline. The baseline is the most recent BENCHMARK() seen in
@@ -371,6 +436,18 @@ void doNotOptimizeAway(T&& datum) {
 */
 #define BENCHMARK_RELATIVE(name, ...)                           \
  BENCHMARK_IMPL(                                               \
+    name,                                                       \
+    "%" FB_STRINGIZE(name),                                     \
+    FB_ARG_2_OR_1(1, ## __VA_ARGS__),                           \
+    FB_ONE_OR_NONE(unsigned, ## __VA_ARGS__),                   \
+    __VA_ARGS__)
+
+/**
+ * Same as BENCHMARK_RELATIVE, but allows to return the actual number
+ * of iterations that have been run.
+ */
+#define BENCHMARK_RELATIVE_MULTI(name, ...)                     \
+  BENCHMARK_MULTI_IMPL(                                         \
    name,                                                       \
    "%" FB_STRINGIZE(name),                                     \
    FB_ONE_OR_NONE(unsigned, ## __VA_ARGS__),                   \
@@ -382,6 +459,13 @@ void doNotOptimizeAway(T&& datum) {
 #define BENCHMARK_RELATIVE_PARAM(name, param)                           \
  BENCHMARK_RELATIVE_NAMED_PARAM(name, param, param)

+/**
+ * Same as BENCHMARK_RELATIVE_PARAM, but allows to return the actual
+ * number of iterations that have been run.
+ */
+#define BENCHMARK_RELATIVE_PARAM_MULTI(name, param)                     \
+  BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(name, param, param)
+
 /**
 * A combination of BENCHMARK_RELATIVE and BENCHMARK_NAMED_PARAM.
 */
@@ -389,17 +473,31 @@ void doNotOptimizeAway(T&& datum) {
  BENCHMARK_IMPL(                                                       \
      FB_CONCATENATE(name, FB_CONCATENATE(_, param_name)),              \
      "%" FB_STRINGIZE(name) "(" FB_STRINGIZE(param_name) ")",          \
+      iters,                                                            \
      unsigned,                                                         \
      iters) {                                                          \
    name(iters, ## __VA_ARGS__);                                        \
  }

+/**
+ * Same as BENCHMARK_RELATIVE_NAMED_PARAM, but allows to return the
+ * actual number of iterations that have been run.
+ */
+#define BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(name, param_name, ...)     \
+  BENCHMARK_MULTI_IMPL(                                                 \
+      FB_CONCATENATE(name, FB_CONCATENATE(_, param_name)),              \
+      "%" FB_STRINGIZE(name) "(" FB_STRINGIZE(param_name) ")",          \
+      unsigned,                                                         \
+      iters) {                                                          \
+    return name(iters, ## __VA_ARGS__);                                 \
+  }
+
 /**
 * Draws a line of dashes.
 */
-#define BENCHMARK_DRAW_LINE()                                   \
-  static bool FB_ANONYMOUS_VARIABLE(follyBenchmarkUnused) = (   \
-    ::folly::addBenchmark(__FILE__, "-", []() { }),             \
+#define BENCHMARK_DRAW_LINE()                                             \
+  static bool FB_ANONYMOUS_VARIABLE(follyBenchmarkUnused) = (             \
+    ::folly::addBenchmark(__FILE__, "-", []() -> unsigned { return 0; }), \
    true);

 /**

--- a/folly/gen/test/Bench.h
+++ b/folly/gen/test/Bench.h
@@ -23,9 +23,11 @@
 static bool FB_ANONYMOUS_VARIABLE(benchGen) = (             \
  ::folly::addBenchmark(__FILE__, prefix FB_STRINGIZE(gen), \
    [](unsigned iters){                                     \
+      const unsigned num = iters;                           \
      while (iters--) {                                     \
        folly::doNotOptimizeAway(gen);                      \
      }                                                     \
+      return num;                                           \
    }), true)
 #define BENCH_GEN(gen) BENCH_GEN_IMPL(gen, "")
 #define BENCH_GEN_REL(gen) BENCH_GEN_IMPL(gen, "%")

--- a/folly/test/BenchmarkTest.cpp
+++ b/folly/test/BenchmarkTest.cpp
@@ -67,6 +67,67 @@ BENCHMARK(superslow) {
  sleep(1);
 }

+BENCHMARK_DRAW_LINE()
+
+BENCHMARK(noMulti) {
+  fun();
+}
+
+BENCHMARK_MULTI(multiSimple) {
+  FOR_EACH_RANGE (i, 0, 10) {
+    fun();
+  }
+  return 10;
+}
+
+BENCHMARK_RELATIVE_MULTI(multiSimpleRel) {
+  FOR_EACH_RANGE (i, 0, 10) {
+    fun();
+    fun();
+  }
+  return 10;
+}
+
+BENCHMARK_MULTI(multiIterArgs, iter) {
+  FOR_EACH_RANGE (i, 0, 10 * iter) {
+    fun();
+  }
+  return 10 * iter;
+}
+
+BENCHMARK_RELATIVE_MULTI(multiIterArgsRel, iter) {
+  FOR_EACH_RANGE (i, 0, 10 * iter) {
+    fun();
+    fun();
+  }
+  return 10 * iter;
+}
+
+unsigned paramMulti(unsigned iter, unsigned num) {
+  for (unsigned i = 0; i < iter; ++i) {
+    for (unsigned j = 0; j < num; ++j) {
+      fun();
+    }
+  }
+  return num * iter;
+}
+
+unsigned paramMultiRel(unsigned iter, unsigned num) {
+  for (unsigned i = 0; i < iter; ++i) {
+    for (unsigned j = 0; j < num; ++j) {
+      fun();
+      fun();
+    }
+  }
+  return num * iter;
+}
+
+BENCHMARK_PARAM_MULTI(paramMulti, 1);
+BENCHMARK_RELATIVE_PARAM_MULTI(paramMultiRel, 1);
+
+BENCHMARK_PARAM_MULTI(paramMulti, 5);
+BENCHMARK_RELATIVE_PARAM_MULTI(paramMultiRel, 5);
+
 int main(int argc, char** argv) {
  google::ParseCommandLineFlags(&argc, &argv, true);
  runBenchmarks();