Split benchmarks and tests

Summary:Benchmarks need their own main function, which means they aren't really useful in cases we can't use a custom main function. Reviewed By: yfeldblum Differential Revision: D2962104 fb-gh-sync-id: 25bdc6e5a8bdf8c3aa94d393207a74797b2e1234 shipit-source-id: 25bdc6e5a8bdf8c3aa94d393207a74797b2e1234

Split benchmarks and tests
Summary:Benchmarks need their own main function, which means they aren't really useful in cases we can't use a custom main function. Reviewed By: yfeldblum Differential Revision: D2962104 fb-gh-sync-id: 25bdc6e5a8bdf8c3aa94d393207a74797b2e1234 shipit-source-id: 25bdc6e5a8bdf8c3aa94d393207a74797b2e1234
d1d243a6 · Michael Lee · facebook-github-bot-0 · efe4f93c · d1d243a6 · d1d243a6
Commit d1d243a6 authored Feb 23, 2016 by Michael Lee Committed by facebook-github-bot-0 Feb 23, 2016
7 changed files
--- a/folly/test/BatonBenchmark.cpp
+++ b/folly/test/BatonBenchmark.cpp
+/*
+ * Copyright 2016 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <folly/Baton.h>
+#include <folly/Benchmark.h>
+#include <folly/test/BatonTestHelpers.h>
+#include <folly/test/DeterministicSchedule.h>
+#include <thread>
+#include <semaphore.h>
+#include <gflags/gflags.h>
+#include <gtest/gtest.h>
+using namespace folly;
+using namespace folly::test;
+using folly::detail::EmulatedFutexAtomic;
+typedef DeterministicSchedule DSched;
+BENCHMARK(baton_pingpong, iters) { run_pingpong_test<std::atomic>(iters); }
+BENCHMARK(baton_pingpong_emulated_futex, iters) {
+  run_pingpong_test<EmulatedFutexAtomic>(iters);
+}
+BENCHMARK(posix_sem_pingpong, iters) {
+  sem_t sems[3];
+  sem_t* a = sems + 0;
+  sem_t* b = sems + 2; // to get it on a different cache line
+  sem_init(a, 0, 0);
+  sem_init(b, 0, 0);
+  auto thr = std::thread([=] {
+    for (size_t i = 0; i < iters; ++i) {
+      sem_wait(a);
+      sem_post(b);
+    }
+  });
+  for (size_t i = 0; i < iters; ++i) {
+    sem_post(a);
+    sem_wait(b);
+  }
+  thr.join();
+}
+// I am omitting a benchmark result snapshot because these microbenchmarks
+// mainly illustrate that PreBlockAttempts is very effective for rapid
+// handoffs.  The performance of Baton and sem_t is essentially identical
+// to the required futex calls for the blocking case
+int main(int argc, char** argv) {
+  testing::InitGoogleTest(&argc, argv);
+  gflags::ParseCommandLineFlags(&argc, &argv, true);
+  auto rv = RUN_ALL_TESTS();
+  if (!rv && FLAGS_benchmark) {
+    folly::runBenchmarks();
+  }
+  return rv;
+}
--- a/folly/test/BatonTest.cpp
+++ b/folly/test/BatonTest.cpp
@@ -15,12 +15,11 @@
 */
 #include <folly/Baton.h>
+#include <folly/test/BatonTestHelpers.h>
 #include <folly/test/DeterministicSchedule.h>
 #include <thread>
 #include <semaphore.h>
-#include <gflags/gflags.h>
 #include <gtest/gtest.h>
-#include <folly/Benchmark.h>
 using namespace folly;
 using namespace folly::test;
@@ -34,111 +33,12 @@ TEST(Baton, basic) {
  b.wait();
 }
-template <template<typename> class Atom>
-void run_pingpong_test(int numRounds) {
-  Baton<Atom> batons[17];
-  Baton<Atom>& a = batons[0];
-  Baton<Atom>& b = batons[16]; // to get it on a different cache line
-  auto thr = DSched::thread([&]{
-    for (int i = 0; i < numRounds; ++i) {
-      a.wait();
-      a.reset();
-      b.post();
-    }
-  });
-  for (int i = 0; i < numRounds; ++i) {
-    a.post();
-    b.wait();
-    b.reset();
-  }
-  DSched::join(thr);
-}
 TEST(Baton, pingpong) {
  DSched sched(DSched::uniform(0));
  run_pingpong_test<DeterministicAtomic>(1000);
 }
-BENCHMARK(baton_pingpong, iters) {
-  run_pingpong_test<std::atomic>(iters);
-}
-BENCHMARK(baton_pingpong_emulated_futex, iters) {
-  run_pingpong_test<EmulatedFutexAtomic>(iters);
-}
-BENCHMARK(posix_sem_pingpong, iters) {
-  sem_t sems[3];
-  sem_t* a = sems + 0;
-  sem_t* b = sems + 2; // to get it on a different cache line
-  sem_init(a, 0, 0);
-  sem_init(b, 0, 0);
-  auto thr = std::thread([=]{
-    for (size_t i = 0; i < iters; ++i) {
-      sem_wait(a);
-      sem_post(b);
-    }
-  });
-  for (size_t i = 0; i < iters; ++i) {
-    sem_post(a);
-    sem_wait(b);
-  }
-  thr.join();
-}
-template <template<typename> class Atom, typename Clock>
-void run_basic_timed_wait_tests() {
-  Baton<Atom> b;
-  b.post();
-  // tests if early delivery works fine
-  EXPECT_TRUE(b.timed_wait(Clock::now()));
-}
-template <template<typename> class Atom, typename Clock>
-void run_timed_wait_tmo_tests() {
-  Baton<Atom> b;
-  auto thr = DSched::thread([&]{
-    bool rv = b.timed_wait(Clock::now() + std::chrono::milliseconds(1));
-    // main thread is guaranteed to not post until timeout occurs
-    EXPECT_FALSE(rv);
-  });
-  DSched::join(thr);
-}
-template <template<typename> class Atom, typename Clock>
-void run_timed_wait_regular_test() {
-  Baton<Atom> b;
-  auto thr = DSched::thread([&] {
-    // To wait forever we'd like to use time_point<Clock>::max, but
-    // std::condition_variable does math to convert the timeout to
-    // system_clock without handling overflow.
-    auto farFuture = Clock::now() + std::chrono::hours(1000);
-    bool rv = b.timed_wait(farFuture);
-    if (!std::is_same<Atom<int>, DeterministicAtomic<int>>::value) {
-      // DeterministicAtomic ignores actual times, so doesn't guarantee
-      // a lack of timeout
-      EXPECT_TRUE(rv);
-    }
-  });
-  if (!std::is_same<Atom<int>, DeterministicAtomic<int>>::value) {
-    // If we are using std::atomic (or EmulatedFutexAtomic) then
-    // a sleep here guarantees to a large extent that 'thr' will
-    // execute wait before we post it, thus testing late delivery. For
-    // DeterministicAtomic, we just rely on DeterministicSchedule to do
-    // the scheduling.  The test won't fail if we lose the race, we just
-    // don't get coverage.
-    std::this_thread::sleep_for(std::chrono::milliseconds(2));
-  }
-  b.post();
-  DSched::join(thr);
-}
 TEST(Baton, timed_wait_basic_system_clock) {
  run_basic_timed_wait_tests<std::atomic, std::chrono::system_clock>();
  run_basic_timed_wait_tests<EmulatedFutexAtomic, std::chrono::system_clock>();
@@ -175,32 +75,8 @@ TEST(Baton, timed_wait_steady_clock) {
  run_timed_wait_regular_test<DeterministicAtomic, std::chrono::steady_clock>();
 }
-template <template<typename> class Atom>
-void run_try_wait_tests() {
-  Baton<Atom> b;
-  EXPECT_FALSE(b.try_wait());
-  b.post();
-  EXPECT_TRUE(b.try_wait());
-}
 TEST(Baton, try_wait) {
  run_try_wait_tests<std::atomic>();
  run_try_wait_tests<EmulatedFutexAtomic>();
  run_try_wait_tests<DeterministicAtomic>();
 }
-// I am omitting a benchmark result snapshot because these microbenchmarks
-// mainly illustrate that PreBlockAttempts is very effective for rapid
-// handoffs.  The performance of Baton and sem_t is essentially identical
-// to the required futex calls for the blocking case
-int main(int argc, char** argv) {
-  testing::InitGoogleTest(&argc, argv);
-  gflags::ParseCommandLineFlags(&argc, &argv, true);
-  auto rv = RUN_ALL_TESTS();
-  if (!rv && FLAGS_benchmark) {
-    folly::runBenchmarks();
-  }
-  return rv;
-}
--- a/folly/test/BatonTestHelpers.h
+++ b/folly/test/BatonTestHelpers.h
+/*
+ * Copyright 2016 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+#include <folly/Baton.h>
+#include <folly/test/DeterministicSchedule.h>
+#include <gtest/gtest.h>
+namespace folly {
+namespace test {
+typedef DeterministicSchedule DSched;
+template <template <typename> class Atom>
+void run_pingpong_test(int numRounds) {
+  Baton<Atom> batons[17];
+  Baton<Atom>& a = batons[0];
+  Baton<Atom>& b = batons[16]; // to get it on a different cache line
+  auto thr = DSched::thread([&] {
+    for (int i = 0; i < numRounds; ++i) {
+      a.wait();
+      a.reset();
+      b.post();
+    }
+  });
+  for (int i = 0; i < numRounds; ++i) {
+    a.post();
+    b.wait();
+    b.reset();
+  }
+  DSched::join(thr);
+}
+template <template <typename> class Atom, typename Clock>
+void run_basic_timed_wait_tests() {
+  Baton<Atom> b;
+  b.post();
+  // tests if early delivery works fine
+  EXPECT_TRUE(b.timed_wait(Clock::now()));
+}
+template <template <typename> class Atom, typename Clock>
+void run_timed_wait_tmo_tests() {
+  Baton<Atom> b;
+  auto thr = DSched::thread([&] {
+    bool rv = b.timed_wait(Clock::now() + std::chrono::milliseconds(1));
+    // main thread is guaranteed to not post until timeout occurs
+    EXPECT_FALSE(rv);
+  });
+  DSched::join(thr);
+}
+template <template <typename> class Atom, typename Clock>
+void run_timed_wait_regular_test() {
+  Baton<Atom> b;
+  auto thr = DSched::thread([&] {
+    // To wait forever we'd like to use time_point<Clock>::max, but
+    // std::condition_variable does math to convert the timeout to
+    // system_clock without handling overflow.
+    auto farFuture = Clock::now() + std::chrono::hours(1000);
+    bool rv = b.timed_wait(farFuture);
+    if (!std::is_same<Atom<int>, DeterministicAtomic<int>>::value) {
+      // DeterministicAtomic ignores actual times, so doesn't guarantee
+      // a lack of timeout
+      EXPECT_TRUE(rv);
+    }
+  });
+  if (!std::is_same<Atom<int>, DeterministicAtomic<int>>::value) {
+    // If we are using std::atomic (or EmulatedFutexAtomic) then
+    // a sleep here guarantees to a large extent that 'thr' will
+    // execute wait before we post it, thus testing late delivery. For
+    // DeterministicAtomic, we just rely on DeterministicSchedule to do
+    // the scheduling.  The test won't fail if we lose the race, we just
+    // don't get coverage.
+    std::this_thread::sleep_for(std::chrono::milliseconds(2));
+  }
+  b.post();
+  DSched::join(thr);
+}
+template <template <typename> class Atom>
+void run_try_wait_tests() {
+  Baton<Atom> b;
+  EXPECT_FALSE(b.try_wait());
+  b.post();
+  EXPECT_TRUE(b.try_wait());
+}
+}
+}
--- a/folly/test/ForeachBenchmark.cpp
+++ b/folly/test/ForeachBenchmark.cpp
+/*
+ * Copyright 2016 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <folly/Foreach.h>
+#include <folly/Benchmark.h>
+#include <gtest/gtest.h>
+#include <map>
+using namespace folly;
+using namespace folly::detail;
+// Benchmarks:
+// 1. Benchmark iterating through the man with FOR_EACH, and also assign
+//    iter->first and iter->second to local vars inside the FOR_EACH loop.
+// 2. Benchmark iterating through the man with FOR_EACH, but use iter->first and
+//    iter->second as is, without assigning to local variables.
+// 3. Use FOR_EACH_KV loop to iterate through the map.
+std::map<int, std::string> bmMap; // For use in benchmarks below.
+void setupBenchmark(size_t iters) {
+  bmMap.clear();
+  for (size_t i = 0; i < iters; ++i) {
+    bmMap[i] = "teststring";
+  }
+}
+BENCHMARK(ForEachKVNoMacroAssign, iters) {
+  int sumKeys = 0;
+  std::string sumValues;
+  BENCHMARK_SUSPEND { setupBenchmark(iters); }
+  FOR_EACH(iter, bmMap) {
+    const int k = iter->first;
+    const std::string v = iter->second;
+    sumKeys += k;
+    sumValues += v;
+  }
+}
+BENCHMARK(ForEachKVNoMacroNoAssign, iters) {
+  int sumKeys = 0;
+  std::string sumValues;
+  BENCHMARK_SUSPEND { setupBenchmark(iters); }
+  FOR_EACH(iter, bmMap) {
+    sumKeys += iter->first;
+    sumValues += iter->second;
+  }
+}
+BENCHMARK(ManualLoopNoAssign, iters) {
+  int sumKeys = 0;
+  std::string sumValues;
+  BENCHMARK_SUSPEND { setupBenchmark(iters); }
+  for (auto iter = bmMap.begin(); iter != bmMap.end(); ++iter) {
+    sumKeys += iter->first;
+    sumValues += iter->second;
+  }
+}
+BENCHMARK(ForEachKVMacro, iters) {
+  int sumKeys = 0;
+  std::string sumValues;
+  BENCHMARK_SUSPEND { setupBenchmark(iters); }
+  FOR_EACH_KV(k, v, bmMap) {
+    sumKeys += k;
+    sumValues += v;
+  }
+}
+BENCHMARK(ForEachManual, iters) {
+  int sum = 1;
+  for (size_t i = 1; i < iters; ++i) {
+    sum *= i;
+  }
+  doNotOptimizeAway(sum);
+}
+BENCHMARK(ForEachRange, iters) {
+  int sum = 1;
+  FOR_EACH_RANGE(i, 1, iters) { sum *= i; }
+  doNotOptimizeAway(sum);
+}
+BENCHMARK(ForEachDescendingManual, iters) {
+  int sum = 1;
+  for (size_t i = iters; i-- > 1;) {
+    sum *= i;
+  }
+  doNotOptimizeAway(sum);
+}
+BENCHMARK(ForEachRangeR, iters) {
+  int sum = 1;
+  FOR_EACH_RANGE_R(i, 1U, iters) { sum *= i; }
+  doNotOptimizeAway(sum);
+}
+int main(int argc, char** argv) {
+  testing::InitGoogleTest(&argc, argv);
+  auto r = RUN_ALL_TESTS();
+  if (r) {
+    return r;
+  }
+  runBenchmarks();
+  return 0;
+}
--- a/folly/test/ForeachTest.cpp
+++ b/folly/test/ForeachTest.cpp
@@ -16,7 +16,6 @@
 #include <folly/Foreach.h>
-#include <folly/Benchmark.h>
 #include <gtest/gtest.h>
 #include <map>
 #include <string>
@@ -165,119 +164,3 @@ TEST(Foreach, ForEachRangeR) {
  }
  EXPECT_EQ(10, sum);
 }
-// Benchmarks:
-// 1. Benchmark iterating through the man with FOR_EACH, and also assign
-//    iter->first and iter->second to local vars inside the FOR_EACH loop.
-// 2. Benchmark iterating through the man with FOR_EACH, but use iter->first and
-//    iter->second as is, without assigning to local variables.
-// 3. Use FOR_EACH_KV loop to iterate through the map.
-std::map<int, std::string> bmMap;  // For use in benchmarks below.
-void setupBenchmark(size_t iters) {
-  bmMap.clear();
-  for (size_t i = 0; i < iters; ++i) {
-    bmMap[i] = "teststring";
-  }
-}
-BENCHMARK(ForEachKVNoMacroAssign, iters) {
-  int sumKeys = 0;
-  std::string sumValues;
-  BENCHMARK_SUSPEND {
-    setupBenchmark(iters);
-  }
-  FOR_EACH (iter, bmMap) {
-    const int k = iter->first;
-    const std::string v = iter->second;
-    sumKeys += k;
-    sumValues += v;
-  }
-}
-BENCHMARK(ForEachKVNoMacroNoAssign, iters) {
-  int sumKeys = 0;
-  std::string sumValues;
-  BENCHMARK_SUSPEND {
-    setupBenchmark(iters);
-  }
-  FOR_EACH (iter, bmMap) {
-    sumKeys += iter->first;
-    sumValues += iter->second;
-  }
-}
-BENCHMARK(ManualLoopNoAssign, iters) {
-  int sumKeys = 0;
-  std::string sumValues;
-  BENCHMARK_SUSPEND {
-    setupBenchmark(iters);
-  }
-  for (auto iter = bmMap.begin(); iter != bmMap.end(); ++iter) {
-    sumKeys += iter->first;
-    sumValues += iter->second;
-  }
-}
-BENCHMARK(ForEachKVMacro, iters) {
-  int sumKeys = 0;
-  std::string sumValues;
-  BENCHMARK_SUSPEND {
-    setupBenchmark(iters);
-  }
-  FOR_EACH_KV (k, v, bmMap) {
-    sumKeys += k;
-    sumValues += v;
-  }
-}
-BENCHMARK(ForEachManual, iters) {
-  int sum = 1;
-  for (size_t i = 1; i < iters; ++i) {
-    sum *= i;
-  }
-  doNotOptimizeAway(sum);
-}
-BENCHMARK(ForEachRange, iters) {
-  int sum = 1;
-  FOR_EACH_RANGE (i, 1, iters) {
-    sum *= i;
-  }
-  doNotOptimizeAway(sum);
-}
-BENCHMARK(ForEachDescendingManual, iters) {
-  int sum = 1;
-  for (size_t i = iters; i-- > 1; ) {
-    sum *= i;
-  }
-  doNotOptimizeAway(sum);
-}
-BENCHMARK(ForEachRangeR, iters) {
-  int sum = 1;
-  FOR_EACH_RANGE_R (i, 1U, iters) {
-    sum *= i;
-  }
-  doNotOptimizeAway(sum);
-}
-int main(int argc, char** argv) {
-  testing::InitGoogleTest(&argc, argv);
-  auto r = RUN_ALL_TESTS();
-  if (r) {
-    return r;
-  }
-  runBenchmarks();
-  return 0;
-}
--- a/folly/test/Makefile.am
+++ b/folly/test/Makefile.am
@@ -67,7 +67,11 @@ sorted_vector_types_test_LDADD = libfollytestmain.la
 foreach_test_SOURCES = ForeachTest.cpp
-foreach_test_LDADD = libfollytestmain.la $(top_builddir)/libfollybenchmark.la
+foreach_test_LDADD = libfollytestmain.la
+foreach_benchmark_SOURCES = ForeachBenchmark.cpp
+foreach_benchmark_LDADD = libfollytestmain.la $(top_builddir)/libfollybenchmark.la
+check_PROGRAMS += foreach_benchmark
 hash_test_SOURCES = HashTest.cpp
 hash_test_LDADD = libfollytestmain.la

--- a/folly/test/RWSpinLockTest.cpp
+++ b/folly/test/RWSpinLockTest.cpp
@@ -24,11 +24,15 @@
 #include <thread>
 #include <gtest/gtest.h>
-#include <gflags/gflags.h>
 #include <glog/logging.h>
 #include <folly/RWSpinLock.h>
+#if FOLLY_HAVE_LIBGFLAGS
+#include <gflags/gflags.h>
 DEFINE_int32(num_threads, 8, "num threads");
+#else
+constexpr int FLAGS_num_threads = 8;
+#endif
 namespace {
@@ -234,9 +238,3 @@ TEST(RWSpinLock, concurrent_holder_test) {
 }
 }
-int main(int argc, char** argv) {
-  testing::InitGoogleTest(&argc, argv);
-  gflags::ParseCommandLineFlags(&argc, &argv, true);
-  return RUN_ALL_TESTS();
-}