Commit 6b0452df authored by Tom Jackson's avatar Tom Jackson Committed by Sara Golemon

Fixed-size split()

Summary:
There are quite a few places where we split strings into a fixed number
of fields. This enables this to be done a bit faster by not using any
variable-length structures at runtime.

Test Plan: Unit tests, Benchmarks

Reviewed By: philipp@fb.com

FB internal diff: D794523
parent 6cee4613
...@@ -347,6 +347,39 @@ template<class String> StringPiece prepareDelim(const String& s) { ...@@ -347,6 +347,39 @@ template<class String> StringPiece prepareDelim(const String& s) {
} }
inline char prepareDelim(char c) { return c; } inline char prepareDelim(char c) { return c; }
template<bool exact,
class Delim>
bool splitFixed(const Delim& delimiter,
StringPiece input,
StringPiece& out) {
if (exact && UNLIKELY(std::string::npos != input.find(delimiter))) {
return false;
}
out = input;
return true;
}
template<bool exact,
class Delim,
class... StringPieces>
bool splitFixed(const Delim& delimiter,
StringPiece input,
StringPiece& outHead,
StringPieces&... outTail) {
size_t cut = input.find(delimiter);
if (UNLIKELY(cut == std::string::npos)) {
return false;
}
StringPiece head(input.begin(), input.begin() + cut);
StringPiece tail(input.begin() + cut + detail::delimSize(delimiter),
input.end());
if (LIKELY(splitFixed<exact>(delimiter, tail, outTail...))) {
outHead = head;
return true;
}
return false;
}
} }
////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////
...@@ -388,6 +421,20 @@ void splitTo(const Delim& delimiter, ...@@ -388,6 +421,20 @@ void splitTo(const Delim& delimiter,
ignoreEmpty); ignoreEmpty);
} }
template<bool exact,
class Delim,
class... StringPieces>
bool split(const Delim& delimiter,
StringPiece input,
StringPiece& outHead,
StringPieces&... outTail) {
return detail::splitFixed<exact>(
detail::prepareDelim(delimiter),
input,
outHead,
outTail...);
}
namespace detail { namespace detail {
template <class Iterator> template <class Iterator>
......
...@@ -394,6 +394,35 @@ void splitTo(const Delim& delimiter, ...@@ -394,6 +394,35 @@ void splitTo(const Delim& delimiter,
OutputIterator out, OutputIterator out,
bool ignoreEmpty = false); bool ignoreEmpty = false);
/*
* Split a string into a fixed number of pieces by delimiter. Returns 'true' if
* the fields were all successfully populated.
*
* Example:
*
* folly::StringPiece name, key, value;
* if (folly::split('\t', line, name, key, value))
* ...
*
* The 'exact' template paremeter specifies how the function behaves when too
* many fields are present in the input string. When 'exact' is set to its
* default value of 'true', a call to split will fail if the number of fields in
* the input string does not exactly match the number of output parameters
* passed. If 'exact' is overridden to 'false', all remaining fields will be
* stored, unsplit, in the last field, as shown below:
*
* folly::StringPiece x, y.
* if (folly::split<false>(':', "a:b:c", x, y))
* assert(x == "a" && y == "b:c");
*/
template<bool exact = true,
class Delim,
class... StringPieces>
bool split(const Delim& delimiter,
StringPiece input,
StringPiece& outHead,
StringPieces&... outTail);
/* /*
* Join list of tokens. * Join list of tokens.
* *
......
...@@ -758,6 +758,49 @@ TEST(Split, pieces_fbvector) { ...@@ -758,6 +758,49 @@ TEST(Split, pieces_fbvector) {
piecesTest<folly::fbvector>(); piecesTest<folly::fbvector>();
} }
TEST(Split, fixed) {
StringPiece a, b, c, d;
EXPECT_TRUE(folly::split<false>('.', "a.b.c.d", a, b, c, d));
EXPECT_TRUE(folly::split<false>('.', "a.b.c", a, b, c));
EXPECT_TRUE(folly::split<false>('.', "a.b", a, b));
EXPECT_TRUE(folly::split<false>('.', "a", a));
EXPECT_TRUE(folly::split('.', "a.b.c.d", a, b, c, d));
EXPECT_TRUE(folly::split('.', "a.b.c", a, b, c));
EXPECT_TRUE(folly::split('.', "a.b", a, b));
EXPECT_TRUE(folly::split('.', "a", a));
EXPECT_TRUE(folly::split<false>('.', "a.b.c", a, b, c));
EXPECT_EQ("a", a);
EXPECT_EQ("b", b);
EXPECT_EQ("c", c);
EXPECT_FALSE(folly::split<false>('.', "a.b", a, b, c));
EXPECT_TRUE(folly::split<false>('.', "a.b.c", a, b));
EXPECT_EQ("a", a);
EXPECT_EQ("b.c", b);
EXPECT_TRUE(folly::split('.', "a.b.c", a, b, c));
EXPECT_EQ("a", a);
EXPECT_EQ("b", b);
EXPECT_EQ("c", c);
EXPECT_FALSE(folly::split('.', "a.b.c", a, b));
EXPECT_FALSE(folly::split('.', "a.b", a, b, c));
EXPECT_TRUE(folly::split<false>('.', "a.b", a, b));
EXPECT_EQ("a", a);
EXPECT_EQ("b", b);
EXPECT_FALSE(folly::split<false>('.', "a", a, b));
EXPECT_TRUE(folly::split<false>('.', "a.b", a));
EXPECT_EQ("a.b", a);
EXPECT_TRUE(folly::split('.', "a.b", a, b));
EXPECT_EQ("a", a);
EXPECT_EQ("b", b);
EXPECT_FALSE(folly::split('.', "a", a, b));
EXPECT_FALSE(folly::split('.', "a.b", a));
}
TEST(String, join) { TEST(String, join) {
string output; string output;
...@@ -869,6 +912,22 @@ BENCHMARK(splitOnSingleChar, iters) { ...@@ -869,6 +912,22 @@ BENCHMARK(splitOnSingleChar, iters) {
} }
} }
BENCHMARK(splitOnSingleCharFixed, iters) {
static const std::string line = "one:two:three:four";
for (int i = 0; i < iters << 4; ++i) {
StringPiece a, b, c, d;
folly::split(':', line, a, b, c, d);
}
}
BENCHMARK(splitOnSingleCharFixedAllowExtra, iters) {
static const std::string line = "one:two:three:four";
for (int i = 0; i < iters << 4; ++i) {
StringPiece a, b, c, d;
folly::split<false>(':', line, a, b, c, d);
}
}
BENCHMARK(splitStr, iters) { BENCHMARK(splitStr, iters) {
static const std::string line = "one-*-two-*-three-*-four"; static const std::string line = "one-*-two-*-three-*-four";
for (int i = 0; i < iters << 4; ++i) { for (int i = 0; i < iters << 4; ++i) {
...@@ -877,6 +936,14 @@ BENCHMARK(splitStr, iters) { ...@@ -877,6 +936,14 @@ BENCHMARK(splitStr, iters) {
} }
} }
BENCHMARK(splitStrFixed, iters) {
static const std::string line = "one-*-two-*-three-*-four";
for (int i = 0; i < iters << 4; ++i) {
StringPiece a, b, c, d;
folly::split("-*-", line, a, b, c, d);
}
}
BENCHMARK(boost_splitOnSingleChar, iters) { BENCHMARK(boost_splitOnSingleChar, iters) {
static const std::string line = "one:two:three:four"; static const std::string line = "one:two:three:four";
for (int i = 0; i < iters << 4; ++i) { for (int i = 0; i < iters << 4; ++i) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment