Commit 9cc86052 authored by Vojin Katic's avatar Vojin Katic Committed by Anton Likhtarov

folly::gen::splitByLine

Summary:
I made it work, but please send your feedback how to improve code quality.

splitByLine will split on \r, \n, and \r\n.

Test Plan: add new test, arc unit

Reviewed By: tjackson@fb.com

Subscribers: folly@lists, crawler-diffs@

FB internal diff: D1322212
parent cd5e0d07
......@@ -26,6 +26,39 @@ namespace folly {
namespace gen {
namespace detail {
inline bool splitPrefix(StringPiece& in,
StringPiece& prefix,
StringPiece delimiter) {
auto p = in.find(delimiter);
if (p != std::string::npos) {
prefix.assign(in.data(), in.data() + p);
in.advance(p + delimiter.size());
return true;
}
prefix.clear();
return false;
}
/**
* Split by any of the EOL terms: \r, \n, or \r\n.
*/
inline bool splitPrefix(StringPiece& in,
StringPiece& prefix,
MixedNewlines) {
auto newline = "\r\n";
auto p = in.find_first_of(newline);
if (p != std::string::npos) {
prefix.assign(in.data(), in.data() + p);
in.advance(p);
if (!in.removePrefix(newline)) {
in.advance(1);
}
return true;
}
prefix.clear();
return false;
}
inline bool splitPrefix(StringPiece& in, StringPiece& prefix, char delimiter) {
auto p = static_cast<const char*>(memchr(in.data(), delimiter, in.size()));
if (p) {
......@@ -128,14 +161,16 @@ class StringResplitter : public Operator<StringResplitter> {
}
};
class SplitStringSource : public GenImpl<StringPiece, SplitStringSource> {
template <class DelimiterType = char>
class SplitStringSource
: public GenImpl<StringPiece, SplitStringSource<DelimiterType>> {
StringPiece source_;
char delimiter_;
DelimiterType delimiter_;
public:
SplitStringSource(const StringPiece& source,
char delimiter)
DelimiterType delimiter)
: source_(source)
, delimiter_(delimiter) { }
, delimiter_(std::move(delimiter)) { }
template <class Body>
bool apply(Body&& body) const {
......@@ -166,7 +201,7 @@ template<class Delimiter,
class Unsplit : public Operator<Unsplit<Delimiter, Output>> {
Delimiter delimiter_;
public:
Unsplit(const Delimiter& delimiter)
explicit Unsplit(const Delimiter& delimiter)
: delimiter_(delimiter) {
}
......
/*
* Copyright 2013 Facebook, Inc.
* Copyright 2014 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
......@@ -25,6 +25,8 @@ namespace gen {
namespace detail {
class StringResplitter;
template<class Delimiter>
class SplitStringSource;
template<class Delimiter, class Output>
......@@ -54,11 +56,30 @@ S resplit(char delimiter) {
return S(delimiter);
}
template <class S=detail::SplitStringSource>
template <class S = detail::SplitStringSource<char>>
S split(const StringPiece& source, char delimiter) {
return S(source, delimiter);
}
template <class S = detail::SplitStringSource<StringPiece>>
S split(StringPiece source, StringPiece delimiter) {
return S(source, delimiter);
}
/**
* EOL terms ("\r", "\n", or "\r\n").
*/
class MixedNewlines {};
/**
* Split by EOL ("\r", "\n", or "\r\n").
* @see split().
*/
template <class S = detail::SplitStringSource<MixedNewlines>>
S lines(StringPiece source) {
return S(source, MixedNewlines{});
}
/*
* Joins a sequence of tokens into a string, with the chosen delimiter.
*
......
......@@ -82,13 +82,36 @@ TEST(StringGen, Split) {
}
{
auto pieces = split("hello,, world, goodbye, meow", ',')
auto pieces = split("hello,, world, goodbye, meow", ",")
| take(5) | collect;
EXPECT_EQ(5, pieces.size());
EXPECT_EQ("hello", pieces[0]);
EXPECT_EQ("", pieces[1]);
EXPECT_EQ(" world", pieces[2]);
}
{
auto pieces = split("hello,, world, goodbye, meow", ", ")
| collect;
EXPECT_EQ(4, pieces.size());
EXPECT_EQ("hello,", pieces[0]);
EXPECT_EQ("world", pieces[1]);
EXPECT_EQ("goodbye", pieces[2]);
EXPECT_EQ("meow", pieces[3]);
}
}
TEST(StringGen, SplitByNewLine) {
auto collect = eachTo<std::string>() | as<vector>();
{
auto pieces = lines("hello\n\n world\r\n goodbye\r meow") | collect;
EXPECT_EQ(5, pieces.size());
EXPECT_EQ("hello", pieces[0]);
EXPECT_EQ("", pieces[1]);
EXPECT_EQ(" world", pieces[2]);
EXPECT_EQ(" goodbye", pieces[3]);
EXPECT_EQ(" meow", pieces[4]);
}
}
TEST(StringGen, EmptyResplit) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment