Commit d6ed81ca authored by Marcelo Juchem's avatar Marcelo Juchem Committed by Sara Golemon

split_step - tokenization make simple in folly::Range

Summary: this is a simple method that allows a folly::Range `[b, e)` to be split on a character at position `i` (where b <= i < e) in an incremental manner.
@override-unit-failures

Test Plan: unit tests added

Reviewed By: andrei.alexandrescu@fb.com

FB internal diff: D1145631
parent 8cb35a9e
/*
* Copyright 2013 Facebook, Inc.
* Copyright 2014 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
......@@ -508,6 +508,93 @@ public:
return endsWith(suffix) && (--e_, true);
}
/**
* Splits this `Range` `[b, e)` in the position `i` dictated by the next
* occurence of `delimiter`.
*
* Returns a new `Range` `[b, i)` and adjusts this range to start right after
* the delimiter's position. This range will be empty if the delimiter is not
* found. If called on an empty `Range`, both this and the returned `Range`
* will be empty.
*
* Example:
*
* folly::StringPiece s("sample string for split_next");
* auto p = s.split_step(' ');
*
* // prints "sample"
* cout << s << endl;
*
* // prints "string for split_next"
* cout << p << endl;
*
* Example 2:
*
* void tokenize(StringPiece s, char delimiter) {
* while (!s.empty()) {
* cout << s.split_step(delimiter);
* }
* }
*
* @author: Marcelo Juchem <marcelo@fb.com>
*/
Range split_step(value_type delimiter) {
auto i = std::find(b_, e_, delimiter);
Range result(b_, i);
b_ = i == e_ ? e_ : std::next(i);
return result;
}
Range split_step(Range delimiter) {
auto i = find(delimiter);
Range result(b_, i == std::string::npos ? size() : i);
b_ = result.end() == e_ ? e_ : std::next(result.end(), delimiter.size());
return result;
}
/**
* Convenience method that calls `split_step()` and passes the result to a
* functor, returning whatever the functor does.
*
* Say you have a functor with this signature:
*
* Foo fn(Range r) { }
*
* `split_step()`'s return type will be `Foo`. It works just like:
*
* auto result = fn(myRange.split_step(' '));
*
* A functor returning `void` is also supported.
*
* Example:
*
* void do_some_parsing(folly::StringPiece s) {
* auto version = s.split_step(' ', [&](folly::StringPiece x) {
* if (x.empty()) {
* throw std::invalid_argument("empty string");
* }
* return std::strtoull(x.begin(), x.end(), 16);
* });
*
* // ...
* }
*
* @author: Marcelo Juchem <marcelo@fb.com>
*/
template <typename TProcess>
auto split_step(value_type delimiter, TProcess &&process)
-> decltype(process(std::declval<Range>()))
{ return process(split_step(delimiter)); }
template <typename TProcess>
auto split_step(Range delimiter, TProcess &&process)
-> decltype(process(std::declval<Range>()))
{ return process(split_step(delimiter)); }
private:
Iter b_, e_;
};
......
/*
* Copyright 2013 Facebook, Inc.
* Copyright 2014 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
......@@ -20,8 +20,9 @@
#include "folly/Range.h"
#include <limits>
#include <stdlib.h>
#include <cstdlib>
#include <string>
#include <iterator>
#include <sys/mman.h>
#include <boost/range/concepts.hpp>
#include <gtest/gtest.h>
......@@ -422,6 +423,298 @@ TEST(StringPiece, SuffixEmpty) {
EXPECT_EQ("", a);
}
TEST(StringPiece, split_step_char_delimiter) {
// 0 1 2
// 012345678901234567890123456
auto const s = "this is just a test string";
auto const e = std::next(s, std::strlen(s));
EXPECT_EQ('\0', *e);
folly::StringPiece p(s);
EXPECT_EQ(s, p.begin());
EXPECT_EQ(e, p.end());
auto x = p.split_step(' ');
EXPECT_EQ(std::next(s, 5), p.begin());
EXPECT_EQ(e, p.end());
EXPECT_EQ("this", x);
x = p.split_step(' ');
EXPECT_EQ(std::next(s, 8), p.begin());
EXPECT_EQ(e, p.end());
EXPECT_EQ("is", x);
x = p.split_step('u');
EXPECT_EQ(std::next(s, 10), p.begin());
EXPECT_EQ(e, p.end());
EXPECT_EQ("j", x);
x = p.split_step(' ');
EXPECT_EQ(std::next(s, 13), p.begin());
EXPECT_EQ(e, p.end());
EXPECT_EQ("st", x);
x = p.split_step(' ');
EXPECT_EQ(std::next(s, 14), p.begin());
EXPECT_EQ(e, p.end());
EXPECT_EQ("", x);
x = p.split_step(' ');
EXPECT_EQ(std::next(s, 16), p.begin());
EXPECT_EQ(e, p.end());
EXPECT_EQ("a", x);
x = p.split_step(' ');
EXPECT_EQ(std::next(s, 21), p.begin());
EXPECT_EQ(e, p.end());
EXPECT_EQ("test", x);
x = p.split_step(' ');
EXPECT_EQ(e, p.begin());
EXPECT_EQ(e, p.end());
EXPECT_EQ("string", x);
x = p.split_step(' ');
EXPECT_EQ(e, p.begin());
EXPECT_EQ(e, p.end());
EXPECT_EQ("", x);
}
TEST(StringPiece, split_step_range_delimiter) {
// 0 1 2 3
// 0123456789012345678901234567890123
auto const s = "this is just a test string";
auto const e = std::next(s, std::strlen(s));
EXPECT_EQ('\0', *e);
folly::StringPiece p(s);
EXPECT_EQ(s, p.begin());
EXPECT_EQ(e, p.end());
auto x = p.split_step(" ");
EXPECT_EQ(std::next(s, 6), p.begin());
EXPECT_EQ(e, p.end());
EXPECT_EQ("this", x);
x = p.split_step(" ");
EXPECT_EQ(std::next(s, 10), p.begin());
EXPECT_EQ(e, p.end());
EXPECT_EQ("is", x);
x = p.split_step("u");
EXPECT_EQ(std::next(s, 12), p.begin());
EXPECT_EQ(e, p.end());
EXPECT_EQ("j", x);
x = p.split_step(" ");
EXPECT_EQ(std::next(s, 16), p.begin());
EXPECT_EQ(e, p.end());
EXPECT_EQ("st", x);
x = p.split_step(" ");
EXPECT_EQ(std::next(s, 18), p.begin());
EXPECT_EQ(e, p.end());
EXPECT_EQ("", x);
x = p.split_step(" ");
EXPECT_EQ(std::next(s, 21), p.begin());
EXPECT_EQ(e, p.end());
EXPECT_EQ("a", x);
x = p.split_step(" ");
EXPECT_EQ(std::next(s, 28), p.begin());
EXPECT_EQ(e, p.end());
EXPECT_EQ(" test", x);
x = p.split_step(" ");
EXPECT_EQ(e, p.begin());
EXPECT_EQ(e, p.end());
EXPECT_EQ("string", x);
x = p.split_step(" ");
EXPECT_EQ(e, p.begin());
EXPECT_EQ(e, p.end());
EXPECT_EQ("", x);
x = p.split_step(" ");
EXPECT_EQ(e, p.begin());
EXPECT_EQ(e, p.end());
EXPECT_EQ("", x);
}
void split_step_with_process_noop(folly::StringPiece) {}
TEST(StringPiece, split_step_with_process_char_delimiter) {
// 0 1 2
// 012345678901234567890123456
auto const s = "this is just a test string";
auto const e = std::next(s, std::strlen(s));
EXPECT_EQ('\0', *e);
folly::StringPiece p(s);
EXPECT_EQ(s, p.begin());
EXPECT_EQ(e, p.end());
EXPECT_EQ(1, (p.split_step(' ', [&](folly::StringPiece x) {
EXPECT_EQ(std::next(s, 5), p.begin());
EXPECT_EQ(e, p.end());
EXPECT_EQ("this", x);
return 1;
})));
EXPECT_EQ(2, (p.split_step(' ', [&](folly::StringPiece x) {
EXPECT_EQ(std::next(s, 8), p.begin());
EXPECT_EQ(e, p.end());
EXPECT_EQ("is", x);
return 2;
})));
EXPECT_EQ(3, (p.split_step('u', [&](folly::StringPiece x) {
EXPECT_EQ(std::next(s, 10), p.begin());
EXPECT_EQ(e, p.end());
EXPECT_EQ("j", x);
return 3;
})));
EXPECT_EQ(4, (p.split_step(' ', [&](folly::StringPiece x) {
EXPECT_EQ(std::next(s, 13), p.begin());
EXPECT_EQ(e, p.end());
EXPECT_EQ("st", x);
return 4;
})));
EXPECT_EQ(5, (p.split_step(' ', [&](folly::StringPiece x) {
EXPECT_EQ(std::next(s, 14), p.begin());
EXPECT_EQ(e, p.end());
EXPECT_EQ("", x);
return 5;
})));
EXPECT_EQ(6, (p.split_step(' ', [&](folly::StringPiece x) {
EXPECT_EQ(std::next(s, 16), p.begin());
EXPECT_EQ(e, p.end());
EXPECT_EQ("a", x);
return 6;
})));
EXPECT_EQ(7, (p.split_step(' ', [&](folly::StringPiece x) {
EXPECT_EQ(std::next(s, 21), p.begin());
EXPECT_EQ(e, p.end());
EXPECT_EQ("test", x);
return 7;
})));
EXPECT_EQ(8, (p.split_step(' ', [&](folly::StringPiece x) {
EXPECT_EQ(e, p.begin());
EXPECT_EQ(e, p.end());
EXPECT_EQ("string", x);
return 8;
})));
EXPECT_EQ(9, (p.split_step(' ', [&](folly::StringPiece x) {
EXPECT_EQ(e, p.begin());
EXPECT_EQ(e, p.end());
EXPECT_EQ("", x);
return 9;
})));
EXPECT_TRUE((std::is_same<
void,
decltype(p.split_step(' ', split_step_with_process_noop))
>::value));
EXPECT_NO_THROW(p.split_step(' ', split_step_with_process_noop));
}
TEST(StringPiece, split_step_with_process_range_delimiter) {
// 0 1 2 3
// 0123456789012345678901234567890123
auto const s = "this is just a test string";
auto const e = std::next(s, std::strlen(s));
EXPECT_EQ('\0', *e);
folly::StringPiece p(s);
EXPECT_EQ(s, p.begin());
EXPECT_EQ(e, p.end());
EXPECT_EQ(1, (p.split_step(" ", [&](folly::StringPiece x) {
EXPECT_EQ(std::next(s, 6), p.begin());
EXPECT_EQ(e, p.end());
EXPECT_EQ("this", x);
return 1;
})));
EXPECT_EQ(2, (p.split_step(" ", [&](folly::StringPiece x) {
EXPECT_EQ(std::next(s, 10), p.begin());
EXPECT_EQ(e, p.end());
EXPECT_EQ("is", x);
return 2;
})));
EXPECT_EQ(3, (p.split_step("u", [&](folly::StringPiece x) {
EXPECT_EQ(std::next(s, 12), p.begin());
EXPECT_EQ(e, p.end());
EXPECT_EQ("j", x);
return 3;
})));
EXPECT_EQ(4, (p.split_step(" ", [&](folly::StringPiece x) {
EXPECT_EQ(std::next(s, 16), p.begin());
EXPECT_EQ(e, p.end());
EXPECT_EQ("st", x);
return 4;
})));
EXPECT_EQ(5, (p.split_step(" ", [&](folly::StringPiece x) {
EXPECT_EQ(std::next(s, 18), p.begin());
EXPECT_EQ(e, p.end());
EXPECT_EQ("", x);
return 5;
})));
EXPECT_EQ(6, (p.split_step(" ", [&](folly::StringPiece x) {
EXPECT_EQ(std::next(s, 21), p.begin());
EXPECT_EQ(e, p.end());
EXPECT_EQ("a", x);
return 6;
})));
EXPECT_EQ(7, (p.split_step(" ", [&](folly::StringPiece x) {
EXPECT_EQ(std::next(s, 28), p.begin());
EXPECT_EQ(e, p.end());
EXPECT_EQ(" test", x);
return 7;
})));
EXPECT_EQ(8, (p.split_step(" ", [&](folly::StringPiece x) {
EXPECT_EQ(e, p.begin());
EXPECT_EQ(e, p.end());
EXPECT_EQ("string", x);
return 8;
})));
EXPECT_EQ(9, (p.split_step(" ", [&](folly::StringPiece x) {
EXPECT_EQ(e, p.begin());
EXPECT_EQ(e, p.end());
EXPECT_EQ("", x);
return 9;
})));
EXPECT_EQ(10, (p.split_step(" ", [&](folly::StringPiece x) {
EXPECT_EQ(e, p.begin());
EXPECT_EQ(e, p.end());
EXPECT_EQ("", x);
return 10;
})));
EXPECT_TRUE((std::is_same<
void,
decltype(p.split_step(' ', split_step_with_process_noop))
>::value));
EXPECT_NO_THROW(p.split_step(' ', split_step_with_process_noop));
}
TEST(qfind, UInt32_Ranges) {
vector<uint32_t> a({1, 2, 3, 260, 5});
vector<uint32_t> b({2, 3, 4});
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment