Commit 98889326 authored by Song Zhou's avatar Song Zhou Committed by Facebook Github Bot

Added a new variant of byLine to keep the delimiter

Summary: new method byLineFull will not trim the delimiter so that consumers can check if final line is ended up with delimiter or not.

Reviewed By: philippv, yfeldblum

Differential Revision: D5085371

fbshipit-source-id: 5045127ee11d008e3cd7d13d33bffad280fe0a7e
parent 7e04d475
...@@ -120,6 +120,15 @@ class FileWriter : public Operator<FileWriter> { ...@@ -120,6 +120,15 @@ class FileWriter : public Operator<FileWriter> {
std::unique_ptr<IOBuf> buffer_; std::unique_ptr<IOBuf> buffer_;
}; };
inline auto byLineImpl(File file, char delim, bool keepDelimiter)
-> decltype(fromFile(std::move(file))
| eachAs<StringPiece>()
| resplit(delim, keepDelimiter)) {
return fromFile(std::move(file))
| eachAs<StringPiece>()
| resplit(delim, keepDelimiter);
}
} // !detail } // !detail
/** /**
...@@ -127,13 +136,24 @@ class FileWriter : public Operator<FileWriter> { ...@@ -127,13 +136,24 @@ class FileWriter : public Operator<FileWriter> {
* Note: This produces StringPieces which reference temporary strings which are * Note: This produces StringPieces which reference temporary strings which are
* only valid during iteration. * only valid during iteration.
*/ */
inline auto byLineFull(File file, char delim = '\n')
-> decltype(detail::byLineImpl(std::move(file), delim, true)) {
return detail::byLineImpl(std::move(file), delim, true);
}
inline auto byLineFull(int fd, char delim = '\n')
-> decltype(byLineFull(File(fd), delim)) {
return byLineFull(File(fd), delim);
}
inline auto byLineFull(const char* f, char delim = '\n')
-> decltype(byLineFull(File(f), delim)) {
return byLineFull(File(f), delim);
}
inline auto byLine(File file, char delim = '\n') inline auto byLine(File file, char delim = '\n')
-> decltype(fromFile(std::move(file)) -> decltype(detail::byLineImpl(std::move(file), delim, false)) {
| eachAs<StringPiece>() return detail::byLineImpl(std::move(file), delim, false);
| resplit(delim)) {
return fromFile(std::move(file))
| eachAs<StringPiece>()
| resplit(delim);
} }
inline auto byLine(int fd, char delim = '\n') inline auto byLine(int fd, char delim = '\n')
...@@ -141,5 +161,4 @@ inline auto byLine(int fd, char delim = '\n') ...@@ -141,5 +161,4 @@ inline auto byLine(int fd, char delim = '\n')
inline auto byLine(const char* f, char delim = '\n') inline auto byLine(const char* f, char delim = '\n')
-> decltype(byLine(File(f), delim)) { return byLine(File(f), delim); } -> decltype(byLine(File(f), delim)) { return byLine(File(f), delim); }
}} // !folly::gen }} // !folly::gen
...@@ -213,16 +213,23 @@ namespace detail { ...@@ -213,16 +213,23 @@ namespace detail {
class StringResplitter : public Operator<StringResplitter> { class StringResplitter : public Operator<StringResplitter> {
char delimiter_; char delimiter_;
bool keepDelimiter_;
public: public:
explicit StringResplitter(char delimiter) : delimiter_(delimiter) { } explicit StringResplitter(char delimiter, bool keepDelimiter = false)
: delimiter_(delimiter), keepDelimiter_(keepDelimiter) {}
template <class Source> template <class Source>
class Generator : public GenImpl<StringPiece, Generator<Source>> { class Generator : public GenImpl<StringPiece, Generator<Source>> {
Source source_; Source source_;
char delimiter_; char delimiter_;
bool keepDelimiter_;
public: public:
Generator(Source source, char delimiter) Generator(Source source, char delimiter, bool keepDelimiter)
: source_(std::move(source)), delimiter_(delimiter) { } : source_(std::move(source)),
delimiter_(delimiter),
keepDelimiter_(keepDelimiter) {}
template <class Body> template <class Body>
bool apply(Body&& body) const { bool apply(Body&& body) const {
...@@ -236,7 +243,9 @@ class StringResplitter : public Operator<StringResplitter> { ...@@ -236,7 +243,9 @@ class StringResplitter : public Operator<StringResplitter> {
if (s.back() != this->delimiter_) { if (s.back() != this->delimiter_) {
return body(s); return body(s);
} }
s.pop_back(); // Remove the 1-character delimiter if (!keepDelimiter_) {
s.pop_back(); // Remove the 1-character delimiter
}
return body(s); return body(s);
}); });
if (!source_.apply(splitter)) { if (!source_.apply(splitter)) {
...@@ -252,14 +261,14 @@ class StringResplitter : public Operator<StringResplitter> { ...@@ -252,14 +261,14 @@ class StringResplitter : public Operator<StringResplitter> {
class Value, class Value,
class Gen = Generator<Source>> class Gen = Generator<Source>>
Gen compose(GenImpl<Value, Source>&& source) const { Gen compose(GenImpl<Value, Source>&& source) const {
return Gen(std::move(source.self()), delimiter_); return Gen(std::move(source.self()), delimiter_, keepDelimiter_);
} }
template<class Source, template<class Source,
class Value, class Value,
class Gen = Generator<Source>> class Gen = Generator<Source>>
Gen compose(const GenImpl<Value, Source>& source) const { Gen compose(const GenImpl<Value, Source>& source) const {
return Gen(source.self(), delimiter_); return Gen(source.self(), delimiter_, keepDelimiter_);
} }
}; };
......
...@@ -54,9 +54,9 @@ class SplitTo; ...@@ -54,9 +54,9 @@ class SplitTo;
*/ */
// make this a template so we don't require StringResplitter to be complete // make this a template so we don't require StringResplitter to be complete
// until use // until use
template <class S=detail::StringResplitter> template <class S = detail::StringResplitter>
S resplit(char delimiter) { S resplit(char delimiter, bool keepDelimiter = false) {
return S(delimiter); return S(delimiter, keepDelimiter);
} }
template <class S = detail::SplitStringSource<char>> template <class S = detail::SplitStringSource<char>>
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include <folly/Array.h>
#include <folly/File.h> #include <folly/File.h>
#include <folly/Range.h> #include <folly/Range.h>
#include <folly/experimental/TestUtil.h> #include <folly/experimental/TestUtil.h>
...@@ -56,7 +57,34 @@ TEST(FileGen, ByLine) { ...@@ -56,7 +57,34 @@ TEST(FileGen, ByLine) {
} }
} }
class FileGenBufferedTest : public ::testing::TestWithParam<int> { }; TEST(FileGen, ByLineFull) {
auto cases = std::vector<std::string> {
stripLeftMargin(R"(
Hello world
This is the second line
a few empty lines above
incomplete last line)"),
"complete last line\n",
"\n",
""};
for (auto& lines : cases) {
test::TemporaryFile file("ByLineFull");
EXPECT_EQ(lines.size(), write(file.fd(), lines.data(), lines.size()));
auto found =
byLineFull(file.path().string().c_str()) | unsplit<std::string>("");
EXPECT_EQ(lines, found);
}
}
class FileGenBufferedTest : public ::testing::TestWithParam<int> {};
TEST_P(FileGenBufferedTest, FileWriter) { TEST_P(FileGenBufferedTest, FileWriter) {
size_t bufferSize = GetParam(); size_t bufferSize = GetParam();
......
...@@ -260,6 +260,30 @@ TEST(StringGen, Resplit) { ...@@ -260,6 +260,30 @@ TEST(StringGen, Resplit) {
} }
} }
TEST(StringGen, ResplitKeepDelimiter) {
auto collect = eachTo<std::string>() | as<vector>();
{
auto pieces =
from({"hello,, world, goodbye, meow"}) | resplit(',', true) | collect;
ASSERT_EQ(5, pieces.size());
EXPECT_EQ("hello,", pieces[0]);
EXPECT_EQ(",", pieces[1]);
EXPECT_EQ(" world,", pieces[2]);
EXPECT_EQ(" goodbye,", pieces[3]);
EXPECT_EQ(" meow", pieces[4]);
}
{
auto pieces = from({"hel", "lo,", ", world", ", goodbye, m", "eow"}) |
resplit(',', true) | collect;
ASSERT_EQ(5, pieces.size());
EXPECT_EQ("hello,", pieces[0]);
EXPECT_EQ(",", pieces[1]);
EXPECT_EQ(" world,", pieces[2]);
EXPECT_EQ(" goodbye,", pieces[3]);
EXPECT_EQ(" meow", pieces[4]);
}
}
void checkResplitMaxLength(vector<string> ins, void checkResplitMaxLength(vector<string> ins,
char delim, char delim,
uint64_t maxLength, uint64_t maxLength,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment