Commit 81d9192f authored by Nathan Bronson's avatar Nathan Bronson Committed by Facebook Github Bot

improvements to folly::doNotOptimizeAway

Summary:
A common failure mode for folly::doNotOptimizeAway is to pass it a
pointer to a complex object that the caller wants to ensure is fully
calculated, which doesn't actually ensure that.  Also, the GCC and
MSVC implementations used inline assembly with an in-out parameter,
which causes a store.  A survey of many usages of doNotOptimizeAway
found only 1 that relies on this make-unpredictable behavior.

This diff makes doNotOptimizeAway(&x) equivalent to doNotOptimizeAway(x)
for GCC and clang, and makes it a read-only sink.  For the rare
case that the benchmark wants to disable subexpression elimination,
constant propagation, or power reduction, there is a new function
makeUnpredictable.  It also merges the clang and GCC implementations,
removing a potential bias in our microbenchmarks.

Reviewed By: davidtgoldblatt

Differential Revision: D4074670

fbshipit-source-id: 43f02e7fe149147bb172babe77787dea06e098fa
parent cc453345
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include <folly/Portability.h> #include <folly/Portability.h>
#include <folly/Preprocessor.h> // for FB_ANONYMOUS_VARIABLE #include <folly/Preprocessor.h> // for FB_ANONYMOUS_VARIABLE
#include <folly/ScopeGuard.h> #include <folly/ScopeGuard.h>
#include <folly/Traits.h>
#include <folly/portability/GFlags.h> #include <folly/portability/GFlags.h>
#include <folly/portability/Time.h> #include <folly/portability/Time.h>
...@@ -235,32 +236,73 @@ addBenchmark(const char* file, const char* name, Lambda&& lambda) { ...@@ -235,32 +236,73 @@ addBenchmark(const char* file, const char* name, Lambda&& lambda) {
} }
/** /**
* Call doNotOptimizeAway(var) against variables that you use for * Call doNotOptimizeAway(var) to ensure that var will be computed even
* post-optimization. Use it for variables that are computed during
* benchmarking but otherwise are useless. The compiler tends to do a * benchmarking but otherwise are useless. The compiler tends to do a
* good job at eliminating unused variables, and this function fools * good job at eliminating unused variables, and this function fools it
* it into thinking var is in fact needed. * into thinking var is in fact needed.
*
* Call makeUnpredictable(var) when you don't want the optimizer to use
* its knowledge of var to shape the following code. This is useful
* when constant propagation or power reduction is possible during your
* benchmark but not in real use cases.
*/ */
#ifdef _MSC_VER #ifdef _MSC_VER
#pragma optimize("", off) #pragma optimize("", off)
template <class T> inline void doNotOptimizeDependencySink(const void*) {}
void doNotOptimizeAway(T&& datum) {
datum = datum;
}
#pragma optimize("", on) #pragma optimize("", on)
#elif defined(__clang__)
template <class T> template <class T>
__attribute__((__optnone__)) void doNotOptimizeAway(T&& /* datum */) {} void doNotOptimizeAway(const T& datum) {
doNotOptimizeDependencySink(&datum);
}
template <typename T>
void makeUnpredictable(T& datum) {
doNotOptimizeDependencySink(&datum);
}
#else #else
template <class T> namespace detail {
void doNotOptimizeAway(T&& datum) { template <typename T>
asm volatile("" : "+r" (datum)); struct DoNotOptimizeAwayNeedsIndirect {
using Decayed = typename std::decay<T>::type;
// First two constraints ensure it can be an "r" operand.
// std::is_pointer check is because callers seem to expect that
// doNotOptimizeAway(&x) is equivalent to doNotOptimizeAway(x).
constexpr static bool value = !folly::IsTriviallyCopyable<Decayed>::value ||
sizeof(Decayed) > sizeof(long) || std::is_pointer<Decayed>::value;
};
} // detail namespace
template <typename T>
auto doNotOptimizeAway(const T& datum) -> typename std::enable_if<
!detail::DoNotOptimizeAwayNeedsIndirect<T>::value>::type {
asm volatile("" ::"X"(datum));
}
template <typename T>
auto doNotOptimizeAway(const T& datum) -> typename std::enable_if<
detail::DoNotOptimizeAwayNeedsIndirect<T>::value>::type {
asm volatile("" ::"m"(datum) : "memory");
}
template <typename T>
auto makeUnpredictable(T& datum) -> typename std::enable_if<
!detail::DoNotOptimizeAwayNeedsIndirect<T>::value>::type {
asm volatile("" : "+r"(datum));
}
template <typename T>
auto makeUnpredictable(T& datum) -> typename std::enable_if<
detail::DoNotOptimizeAwayNeedsIndirect<T>::value>::type {
asm volatile("" ::"m"(datum) : "memory");
} }
#endif #endif
......
...@@ -46,6 +46,202 @@ BENCHMARK(gun) { ...@@ -46,6 +46,202 @@ BENCHMARK(gun) {
BENCHMARK_DRAW_LINE() BENCHMARK_DRAW_LINE()
BENCHMARK(optimizerCanDiscardTrivial, n) {
long x = 0;
for (long i = 0; i < n; ++i) {
for (long j = 0; j < 10000; ++j) {
x += j;
}
}
}
BENCHMARK(optimizerCanPowerReduceInner1Trivial, n) {
long x = 0;
for (long i = 0; i < n; ++i) {
for (long j = 0; j < 10000; ++j) {
x += i + j;
}
doNotOptimizeAway(x);
}
}
BENCHMARK(optimizerCanPowerReduceInner2Trivial, n) {
long x = 0;
for (long i = 0; i < n; ++i) {
makeUnpredictable(i);
for (long j = 0; j < 10000; ++j) {
x += i + j;
}
}
doNotOptimizeAway(x);
}
BENCHMARK(optimizerDisabled1Trivial, n) {
long x = 0;
for (long i = 0; i < n; ++i) {
for (long j = 0; j < 10000; ++j) {
x += i + j;
doNotOptimizeAway(x);
}
}
}
BENCHMARK(optimizerDisabled2Trivial, n) {
long x = 0;
for (long i = 0; i < n; ++i) {
makeUnpredictable(i);
for (long j = 0; j < 10000; ++j) {
makeUnpredictable(j);
x += i + j;
}
}
doNotOptimizeAway(x);
}
BENCHMARK(optimizerCanPowerReduceInner1TrivialPtr, n) {
long x = 0;
for (long i = 0; i < n; ++i) {
for (long j = 0; j < 10000; ++j) {
x += i + j;
}
doNotOptimizeAway(&x);
}
}
BENCHMARK(optimizerCanPowerReduceInner2TrivialPtr, n) {
long x = 0;
for (long i = 0; i < n; ++i) {
makeUnpredictable(i);
for (long j = 0; j < 10000; ++j) {
x += i + j;
}
}
doNotOptimizeAway(&x);
}
BENCHMARK(optimizerDisabled1TrivialPtr, n) {
long x = 0;
for (long i = 0; i < n; ++i) {
for (long j = 0; j < 10000; ++j) {
x += i + j;
doNotOptimizeAway(&x);
}
}
}
namespace {
class NonTrivialLong {
public:
explicit NonTrivialLong(long v) : value_(v) {}
virtual ~NonTrivialLong() {}
void operator++() {
++value_;
}
void operator+=(long rhs) {
value_ += rhs;
}
void operator+=(const NonTrivialLong& rhs) {
value_ += rhs.value_;
}
bool operator<(long rhs) {
return value_ < rhs;
}
NonTrivialLong operator+(const NonTrivialLong& rhs) {
return NonTrivialLong(value_ + rhs.value_);
}
private:
long value_;
long otherStuff_[3];
};
}
BENCHMARK(optimizerCanDiscardNonTrivial, n) {
NonTrivialLong x(0);
for (NonTrivialLong i(0); i < n; ++i) {
for (NonTrivialLong j(0); j < 10000; ++j) {
x += j;
}
}
}
BENCHMARK(optimizerCanPowerReduceInner1NonTrivial, n) {
NonTrivialLong x(0);
for (NonTrivialLong i(0); i < n; ++i) {
for (NonTrivialLong j(0); j < 10000; ++j) {
x += i + j;
}
doNotOptimizeAway(x);
}
}
BENCHMARK(optimizerCanPowerReduceInner2NonTrivial, n) {
NonTrivialLong x(0);
for (NonTrivialLong i(0); i < n; ++i) {
makeUnpredictable(i);
for (NonTrivialLong j(0); j < 10000; ++j) {
x += i + j;
}
}
doNotOptimizeAway(x);
}
BENCHMARK(optimizerDisabled1NonTrivial, n) {
NonTrivialLong x(0);
for (NonTrivialLong i(0); i < n; ++i) {
for (NonTrivialLong j(0); j < 10000; ++j) {
x += i + j;
doNotOptimizeAway(x);
}
}
}
BENCHMARK(optimizerDisabled2NonTrivial, n) {
NonTrivialLong x(0);
for (NonTrivialLong i(0); i < n; ++i) {
makeUnpredictable(i);
for (NonTrivialLong j(0); j < 10000; ++j) {
makeUnpredictable(j);
x += i + j;
}
}
doNotOptimizeAway(x);
}
BENCHMARK(optimizerCanPowerReduceInner1NonTrivialPtr, n) {
NonTrivialLong x(0);
for (NonTrivialLong i(0); i < n; ++i) {
for (NonTrivialLong j(0); j < 10000; ++j) {
x += i + j;
}
doNotOptimizeAway(&x);
}
}
BENCHMARK(optimizerCanPowerReduceInner2NonTrivialPtr, n) {
NonTrivialLong x(0);
for (NonTrivialLong i(0); i < n; ++i) {
makeUnpredictable(i);
for (NonTrivialLong j(0); j < 10000; ++j) {
x += i + j;
}
}
doNotOptimizeAway(&x);
}
BENCHMARK(optimizerDisabled1NonTrivialPtr, n) {
NonTrivialLong x(0);
for (NonTrivialLong i(0); i < n; ++i) {
for (NonTrivialLong j(0); j < 10000; ++j) {
x += i + j;
doNotOptimizeAway(&x);
}
}
}
BENCHMARK_DRAW_LINE()
BENCHMARK(baselinevector) { BENCHMARK(baselinevector) {
vector<int> v; vector<int> v;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment