Commit c7e6c224 authored by Tom Jackson's avatar Tom Jackson Committed by Sara Golemon

Fixing find_first_of O(n) case

Summary:
The `memchr()`-based `find_first_of()` behaves extremely badly when it's used in a loop and the input string doesn't contain all the needles. This was discovered when a reasonable line-breaking routine tried to use it to break lines by a mix of '\r' and '\n'. The entire remainder of the file was scanned each time a line was read.

Before:

```
CountDelimsBase                    1.26s   794.86m
CountDelimsNoSSE         100.03%   1.26s   795.12m
CountDelimsStd         40501.63%   3.11ms  321.93
CountDelimsMemchr         98.31%   1.28s   781.41m
CountDelimsByteSet     23162.41%   5.43ms  184.11
```

After:

```
CountDelimsBase                   3.20ms  312.08 <-- Base impl no longer considers memchr
CountDelimsNoSSE         102.37%  3.13ms  319.47
CountDelimsStd           103.19%  3.11ms  322.05
CountDelimsMemchr          0.25%  1.27s   788.39m
CountDelimsByteSet        59.68%  5.37ms  186.27
```

Test Plan: Benchmarks

Reviewed By: njormrod@fb.com

Subscribers: folly-diffs@, yfeldblum

FB internal diff: D1823536

Signature: t1:1823536:1423081687:bb2ec8cdea477ee9b9c8d8ad2bfdecdc52657515
parent f4acb308
...@@ -42,26 +42,6 @@ std::ostream& operator<<(std::ostream& os, const MutableStringPiece piece) { ...@@ -42,26 +42,6 @@ std::ostream& operator<<(std::ostream& os, const MutableStringPiece piece) {
return os; return os;
} }
namespace detail {
size_t qfind_first_byte_of_memchr(const StringPiece haystack,
const StringPiece needles) {
size_t best = haystack.size();
for (char needle: needles) {
const void* ptr = memchr(haystack.data(), needle, best);
if (ptr) {
auto found = static_cast<const char*>(ptr) - haystack.data();
best = std::min<size_t>(best, found);
}
}
if (best == haystack.size()) {
return StringPiece::npos;
}
return best;
}
} // namespace detail
namespace { namespace {
// It's okay if pages are bigger than this (as powers of two), but they should // It's okay if pages are bigger than this (as powers of two), but they should
...@@ -95,15 +75,13 @@ size_t qfind_first_byte_of_needles16(const StringPiece haystack, ...@@ -95,15 +75,13 @@ size_t qfind_first_byte_of_needles16(const StringPiece haystack,
DCHECK(!haystack.empty()); DCHECK(!haystack.empty());
DCHECK(!needles.empty()); DCHECK(!needles.empty());
DCHECK_LE(needles.size(), 16); DCHECK_LE(needles.size(), 16);
// benchmarking shows that memchr beats out SSE for small needle-sets
// with large haystacks.
if ((needles.size() <= 2 && haystack.size() >= 256) || if ((needles.size() <= 2 && haystack.size() >= 256) ||
// must bail if we can't even SSE-load a single segment of haystack // must bail if we can't even SSE-load a single segment of haystack
(haystack.size() < 16 && (haystack.size() < 16 &&
PAGE_FOR(haystack.end() - 1) != PAGE_FOR(haystack.data() + 15)) || PAGE_FOR(haystack.end() - 1) != PAGE_FOR(haystack.data() + 15)) ||
// can't load needles into SSE register if it could cross page boundary // can't load needles into SSE register if it could cross page boundary
PAGE_FOR(needles.end() - 1) != PAGE_FOR(needles.data() + 15)) { PAGE_FOR(needles.end() - 1) != PAGE_FOR(needles.data() + 15)) {
return detail::qfind_first_byte_of_memchr(haystack, needles); return detail::qfind_first_byte_of_nosse(haystack, needles);
} }
auto arr2 = __builtin_ia32_loaddqu(needles.data()); auto arr2 = __builtin_ia32_loaddqu(needles.data());
...@@ -278,16 +256,12 @@ size_t qfind_first_byte_of_nosse(const StringPiece haystack, ...@@ -278,16 +256,12 @@ size_t qfind_first_byte_of_nosse(const StringPiece haystack,
// The thresholds below were empirically determined by benchmarking. // The thresholds below were empirically determined by benchmarking.
// This is not an exact science since it depends on the CPU, the size of // This is not an exact science since it depends on the CPU, the size of
// needles, and the size of haystack. // needles, and the size of haystack.
if (haystack.size() == 1 || if ((needles.size() >= 4 && haystack.size() <= 10) ||
(haystack.size() < 4 && needles.size() <= 16)) {
return qfind_first_of(haystack, needles, asciiCaseSensitive);
} else if ((needles.size() >= 4 && haystack.size() <= 10) ||
(needles.size() >= 16 && haystack.size() <= 64) || (needles.size() >= 16 && haystack.size() <= 64) ||
needles.size() >= 32) { needles.size() >= 32) {
return qfind_first_byte_of_byteset(haystack, needles); return qfind_first_byte_of_byteset(haystack, needles);
} }
return qfind_first_of(haystack, needles, asciiCaseSensitive);
return qfind_first_byte_of_memchr(haystack, needles);
} }
} // namespace detail } // namespace detail
......
...@@ -24,9 +24,6 @@ ...@@ -24,9 +24,6 @@
namespace folly { namespace detail { namespace folly { namespace detail {
// declaration of functions in Range.cpp // declaration of functions in Range.cpp
size_t qfind_first_byte_of_memchr(const StringPiece haystack,
const StringPiece needles);
size_t qfind_first_byte_of_byteset(const StringPiece haystack, size_t qfind_first_byte_of_byteset(const StringPiece haystack,
const StringPiece needles); const StringPiece needles);
...@@ -43,6 +40,7 @@ std::string str; ...@@ -43,6 +40,7 @@ std::string str;
constexpr int kVstrSize = 16; constexpr int kVstrSize = 16;
std::vector<std::string> vstr; std::vector<std::string> vstr;
std::vector<StringPiece> vstrp; std::vector<StringPiece> vstrp;
std::string file;
void initStr(int len) { void initStr(int len) {
cout << "string length " << len << ':' << endl; cout << "string length " << len << ':' << endl;
...@@ -74,6 +72,19 @@ string ffoTestString; ...@@ -74,6 +72,19 @@ string ffoTestString;
const size_t ffoDelimSize = 128; const size_t ffoDelimSize = 128;
vector<string> ffoDelim; vector<string> ffoDelim;
void initFile(int len) {
std::uniform_int_distribution<uint32_t> validChar(1, 64);
file.clear();
while (len--) {
char ch = validChar(rnd);
if (ch == '\r') {
ch = '\n';
}
file.push_back(ch);
}
}
string generateString(int len) { string generateString(int len) {
std::uniform_int_distribution<uint32_t> validChar(1, 255); // no null-char std::uniform_int_distribution<uint32_t> validChar(1, 255); // no null-char
string ret; string ret;
...@@ -136,6 +147,20 @@ inline size_t qfind_first_byte_of_std(const StringPiece haystack, ...@@ -136,6 +147,20 @@ inline size_t qfind_first_byte_of_std(const StringPiece haystack,
return qfind_first_of(haystack, needles, asciiCaseSensitive); return qfind_first_of(haystack, needles, asciiCaseSensitive);
} }
template <class Func>
void countHits(Func func, size_t n) {
StringPiece needles = "\r\n\1";
FOR_EACH_RANGE (i, 0, n) {
size_t p, n = 0;
for (StringPiece left = file;
(p = func(left, needles)) != StringPiece::npos;
left.advance(p + 1)) {
++n;
}
doNotOptimizeAway(n);
}
}
template <class Func> template <class Func>
void findFirstOfRange(StringPiece needles, Func func, size_t n) { void findFirstOfRange(StringPiece needles, Func func, size_t n) {
FOR_EACH_RANGE (i, 0, n) { FOR_EACH_RANGE (i, 0, n) {
...@@ -146,6 +171,26 @@ void findFirstOfRange(StringPiece needles, Func func, size_t n) { ...@@ -146,6 +171,26 @@ void findFirstOfRange(StringPiece needles, Func func, size_t n) {
} }
} }
const string delims1 = "b";
BENCHMARK(FindFirstOf1NeedlesBase, n) {
findFirstOfRange(delims1, detail::qfind_first_byte_of, n);
}
BENCHMARK_RELATIVE(FindFirstOf1NeedlesNoSSE, n) {
findFirstOfRange(delims1, detail::qfind_first_byte_of_nosse, n);
}
BENCHMARK_RELATIVE(FindFirstOf1NeedlesStd, n) {
findFirstOfRange(delims1, qfind_first_byte_of_std, n);
}
BENCHMARK_RELATIVE(FindFirstOf1NeedlesByteSet, n) {
findFirstOfRange(delims1, detail::qfind_first_byte_of_byteset, n);
}
BENCHMARK_DRAW_LINE();
const string delims2 = "bc"; const string delims2 = "bc";
BENCHMARK(FindFirstOf2NeedlesBase, n) { BENCHMARK(FindFirstOf2NeedlesBase, n) {
...@@ -160,10 +205,6 @@ BENCHMARK_RELATIVE(FindFirstOf2NeedlesStd, n) { ...@@ -160,10 +205,6 @@ BENCHMARK_RELATIVE(FindFirstOf2NeedlesStd, n) {
findFirstOfRange(delims2, qfind_first_byte_of_std, n); findFirstOfRange(delims2, qfind_first_byte_of_std, n);
} }
BENCHMARK_RELATIVE(FindFirstOf2NeedlesMemchr, n) {
findFirstOfRange(delims2, detail::qfind_first_byte_of_memchr, n);
}
BENCHMARK_RELATIVE(FindFirstOf2NeedlesByteSet, n) { BENCHMARK_RELATIVE(FindFirstOf2NeedlesByteSet, n) {
findFirstOfRange(delims2, detail::qfind_first_byte_of_byteset, n); findFirstOfRange(delims2, detail::qfind_first_byte_of_byteset, n);
} }
...@@ -184,10 +225,6 @@ BENCHMARK_RELATIVE(FindFirstOf4NeedlesStd, n) { ...@@ -184,10 +225,6 @@ BENCHMARK_RELATIVE(FindFirstOf4NeedlesStd, n) {
findFirstOfRange(delims4, qfind_first_byte_of_std, n); findFirstOfRange(delims4, qfind_first_byte_of_std, n);
} }
BENCHMARK_RELATIVE(FindFirstOf4NeedlesMemchr, n) {
findFirstOfRange(delims4, detail::qfind_first_byte_of_memchr, n);
}
BENCHMARK_RELATIVE(FindFirstOf4NeedlesByteSet, n) { BENCHMARK_RELATIVE(FindFirstOf4NeedlesByteSet, n) {
findFirstOfRange(delims4, detail::qfind_first_byte_of_byteset, n); findFirstOfRange(delims4, detail::qfind_first_byte_of_byteset, n);
} }
...@@ -208,10 +245,6 @@ BENCHMARK_RELATIVE(FindFirstOf8NeedlesStd, n) { ...@@ -208,10 +245,6 @@ BENCHMARK_RELATIVE(FindFirstOf8NeedlesStd, n) {
findFirstOfRange(delims8, qfind_first_byte_of_std, n); findFirstOfRange(delims8, qfind_first_byte_of_std, n);
} }
BENCHMARK_RELATIVE(FindFirstOf8NeedlesMemchr, n) {
findFirstOfRange(delims8, detail::qfind_first_byte_of_memchr, n);
}
BENCHMARK_RELATIVE(FindFirstOf8NeedlesByteSet, n) { BENCHMARK_RELATIVE(FindFirstOf8NeedlesByteSet, n) {
findFirstOfRange(delims8, detail::qfind_first_byte_of_byteset, n); findFirstOfRange(delims8, detail::qfind_first_byte_of_byteset, n);
} }
...@@ -232,10 +265,6 @@ BENCHMARK_RELATIVE(FindFirstOf16NeedlesStd, n) { ...@@ -232,10 +265,6 @@ BENCHMARK_RELATIVE(FindFirstOf16NeedlesStd, n) {
findFirstOfRange(delims16, qfind_first_byte_of_std, n); findFirstOfRange(delims16, qfind_first_byte_of_std, n);
} }
BENCHMARK_RELATIVE(FindFirstOf16NeedlesMemchr, n) {
findFirstOfRange(delims16, detail::qfind_first_byte_of_memchr, n);
}
BENCHMARK_RELATIVE(FindFirstOf16NeedlesByteSet, n) { BENCHMARK_RELATIVE(FindFirstOf16NeedlesByteSet, n) {
findFirstOfRange(delims16, detail::qfind_first_byte_of_byteset, n); findFirstOfRange(delims16, detail::qfind_first_byte_of_byteset, n);
} }
...@@ -256,10 +285,6 @@ BENCHMARK_RELATIVE(FindFirstOf32NeedlesStd, n) { ...@@ -256,10 +285,6 @@ BENCHMARK_RELATIVE(FindFirstOf32NeedlesStd, n) {
findFirstOfRange(delims32, qfind_first_byte_of_std, n); findFirstOfRange(delims32, qfind_first_byte_of_std, n);
} }
BENCHMARK_RELATIVE(FindFirstOf32NeedlesMemchr, n) {
findFirstOfRange(delims32, detail::qfind_first_byte_of_memchr, n);
}
BENCHMARK_RELATIVE(FindFirstOf32NeedlesByteSet, n) { BENCHMARK_RELATIVE(FindFirstOf32NeedlesByteSet, n) {
findFirstOfRange(delims32, detail::qfind_first_byte_of_byteset, n); findFirstOfRange(delims32, detail::qfind_first_byte_of_byteset, n);
} }
...@@ -281,10 +306,6 @@ BENCHMARK_RELATIVE(FindFirstOf64NeedlesStd, n) { ...@@ -281,10 +306,6 @@ BENCHMARK_RELATIVE(FindFirstOf64NeedlesStd, n) {
findFirstOfRange(delims64, qfind_first_byte_of_std, n); findFirstOfRange(delims64, qfind_first_byte_of_std, n);
} }
BENCHMARK_RELATIVE(FindFirstOf64NeedlesMemchr, n) {
findFirstOfRange(delims64, detail::qfind_first_byte_of_memchr, n);
}
BENCHMARK_RELATIVE(FindFirstOf64NeedlesByteSet, n) { BENCHMARK_RELATIVE(FindFirstOf64NeedlesByteSet, n) {
findFirstOfRange(delims64, detail::qfind_first_byte_of_byteset, n); findFirstOfRange(delims64, detail::qfind_first_byte_of_byteset, n);
} }
...@@ -312,16 +333,30 @@ BENCHMARK_RELATIVE(FindFirstOfRandomStd, n) { ...@@ -312,16 +333,30 @@ BENCHMARK_RELATIVE(FindFirstOfRandomStd, n) {
findFirstOfRandom(qfind_first_byte_of_std, n); findFirstOfRandom(qfind_first_byte_of_std, n);
} }
BENCHMARK_RELATIVE(FindFirstOfRandomMemchr, n) {
findFirstOfRandom(detail::qfind_first_byte_of_memchr, n);
}
BENCHMARK_RELATIVE(FindFirstOfRandomByteSet, n) { BENCHMARK_RELATIVE(FindFirstOfRandomByteSet, n) {
findFirstOfRandom(detail::qfind_first_byte_of_byteset, n); findFirstOfRandom(detail::qfind_first_byte_of_byteset, n);
} }
BENCHMARK_DRAW_LINE(); BENCHMARK_DRAW_LINE();
BENCHMARK(CountDelimsBase, n) {
countHits(detail::qfind_first_byte_of, n);
}
BENCHMARK_RELATIVE(CountDelimsNoSSE, n) {
countHits(detail::qfind_first_byte_of_nosse, n);
}
BENCHMARK_RELATIVE(CountDelimsStd, n) {
countHits(qfind_first_byte_of_std, n);
}
BENCHMARK_RELATIVE(CountDelimsByteSet, n) {
countHits(detail::qfind_first_byte_of_byteset, n);
}
BENCHMARK_DRAW_LINE();
BENCHMARK(FindFirstOfOffsetRange, n) { BENCHMARK(FindFirstOfOffsetRange, n) {
StringPiece haystack(str); StringPiece haystack(str);
folly::StringPiece needles("bc"); folly::StringPiece needles("bc");
...@@ -342,6 +377,7 @@ int main(int argc, char** argv) { ...@@ -342,6 +377,7 @@ int main(int argc, char** argv) {
for (int len : {1, 8, 10, 16, 32, 64, 128, 256, 10*1024, 1024*1024}) { for (int len : {1, 8, 10, 16, 32, 64, 128, 256, 10*1024, 1024*1024}) {
initStr(len); initStr(len);
initDelims(len); initDelims(len);
initFile(len);
runBenchmarks(); runBenchmarks();
} }
return 0; return 0;
......
...@@ -34,9 +34,6 @@ ...@@ -34,9 +34,6 @@
namespace folly { namespace detail { namespace folly { namespace detail {
// declaration of functions in Range.cpp // declaration of functions in Range.cpp
size_t qfind_first_byte_of_memchr(const StringPiece haystack,
const StringPiece needles);
size_t qfind_first_byte_of_byteset(const StringPiece haystack, size_t qfind_first_byte_of_byteset(const StringPiece haystack,
const StringPiece needles); const StringPiece needles);
...@@ -850,19 +847,14 @@ struct NoSseNeedleFinder { ...@@ -850,19 +847,14 @@ struct NoSseNeedleFinder {
} }
}; };
struct MemchrNeedleFinder {
static size_t find_first_byte_of(StringPiece haystack, StringPiece needles) {
return detail::qfind_first_byte_of_memchr(haystack, needles);
}
};
struct ByteSetNeedleFinder { struct ByteSetNeedleFinder {
static size_t find_first_byte_of(StringPiece haystack, StringPiece needles) { static size_t find_first_byte_of(StringPiece haystack, StringPiece needles) {
return detail::qfind_first_byte_of_byteset(haystack, needles); return detail::qfind_first_byte_of_byteset(haystack, needles);
} }
}; };
typedef ::testing::Types<SseNeedleFinder, NoSseNeedleFinder, MemchrNeedleFinder, typedef ::testing::Types<SseNeedleFinder,
NoSseNeedleFinder,
ByteSetNeedleFinder> NeedleFinders; ByteSetNeedleFinder> NeedleFinders;
TYPED_TEST_CASE(NeedleFinderTest, NeedleFinders); TYPED_TEST_CASE(NeedleFinderTest, NeedleFinders);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment