Commit 792e823b authored by Adam Simpkins's avatar Adam Simpkins Committed by Facebook Github Bot 5

add Cursor::readWhile() and skipWhile()

Summary:
Add generic functions for reading or skipping until a predicate check fails.
This will allow us to simplify a few different call sites that have their own
logic similar to this.  Also change readTerminatedString() to use
readWhile().

Reviewed By: alandau

Differential Revision: D3337581

fbshipit-source-id: 9f50914c83adfc882219046862972661bed0e72a
parent e9ebf3f4
......@@ -202,6 +202,7 @@ nobase_follyinclude_HEADERS = \
IntrusiveList.h \
io/Compression.h \
io/Cursor.h \
io/Cursor-inl.h \
io/IOBuf.h \
io/IOBufQueue.h \
io/RecordIO.h \
......
/*
* Copyright 2016 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
#pragma once
namespace folly {
namespace io {
namespace detail {
/*
* Helper classes for use with CursorBase::readWhile()
*/
class CursorStringAppender {
public:
void append(ByteRange bytes) {
str_.append(reinterpret_cast<char const*>(bytes.data()), bytes.size());
}
std::string extractString() {
return std::move(str_);
}
private:
std::string str_;
};
class CursorNoopAppender {
public:
void append(ByteRange) {}
};
template <class Derived, class BufType>
std::string CursorBase<Derived, BufType>::readTerminatedString(
char termChar,
size_t maxLength) {
size_t bytesRead{0};
auto keepReading = [&bytesRead, termChar, maxLength](uint8_t byte) {
if (byte == termChar) {
return false;
}
++bytesRead;
if (bytesRead >= maxLength) {
throw std::length_error("string overflow");
}
return true;
};
auto result = readWhile(keepReading);
// skip over the terminator character
if (isAtEnd()) {
throw std::out_of_range("terminator not found");
}
skip(1);
return result;
}
template <class Derived, class BufType>
template <typename Predicate>
std::string CursorBase<Derived, BufType>::readWhile(
const Predicate& predicate) {
CursorStringAppender s;
readWhile(predicate, s);
return s.extractString();
}
template <class Derived, class BufType>
template <typename Predicate, typename Output>
void CursorBase<Derived, BufType>::readWhile(
const Predicate& predicate,
Output& out) {
while (true) {
auto peeked = peekBytes();
if (peeked.empty()) {
return;
}
for (size_t idx = 0; idx < peeked.size(); ++idx) {
if (!predicate(peeked[idx])) {
peeked.reset(peeked.data(), idx);
out.append(peeked);
skip(idx);
return;
}
}
out.append(peeked);
skip(peeked.size());
}
}
template <class Derived, class BufType>
template <typename Predicate>
void CursorBase<Derived, BufType>::skipWhile(const Predicate& predicate) {
CursorNoopAppender appender;
readWhile(predicate, appender);
}
}
}
} // folly::io::detail
......@@ -229,34 +229,36 @@ class CursorBase {
*/
std::string readTerminatedString(
char termChar = '\0',
size_t maxLength = std::numeric_limits<size_t>::max()) {
std::string str;
while (!isAtEnd()) {
const uint8_t* buf = data();
size_t buflen = length();
size_t i = 0;
while (i < buflen && buf[i] != termChar) {
++i;
size_t maxLength = std::numeric_limits<size_t>::max());
// Do this check after incrementing 'i', as even though we start at the
// 0 byte, it still represents a single character
if (str.length() + i >= maxLength) {
throw std::length_error("string overflow");
}
}
/*
* Read all bytes until the specified predicate returns true.
*
* The predicate will be called on each byte in turn, until it returns false
* or until the end of the IOBuf chain is reached.
*
* Returns the result as a string.
*/
template <typename Predicate>
std::string readWhile(const Predicate& predicate);
str.append(reinterpret_cast<const char*>(buf), i);
if (i < buflen) {
skip(i + 1);
return str;
}
/*
* Read all bytes until the specified predicate returns true.
*
* This is a more generic version of readWhile() takes an arbitrary Output
* object, and calls Output::append() with each chunk of matching data.
*/
template <typename Predicate, typename Output>
void readWhile(const Predicate& predicate, Output& out);
skip(i);
}
throw std::out_of_range("terminator not found");
}
/*
* Skip all bytes until the specified predicate returns true.
*
* The predicate will be called on each byte in turn, until it returns false
* or until the end of the IOBuf chain is reached.
*/
template <typename Predicate>
void skipWhile(const Predicate& predicate);
size_t skipAtMost(size_t len) {
if (LIKELY(length() >= len)) {
......@@ -419,7 +421,7 @@ class CursorBase {
size_t operator-(const BufType* buf) const {
size_t len = 0;
BufType *curBuf = buf;
const BufType* curBuf = buf;
while (curBuf != crtBuf_) {
len += curBuf->length();
curBuf = curBuf->next();
......@@ -934,3 +936,5 @@ class QueueAppender : public detail::Writable<QueueAppender> {
};
}} // folly::io
#include <folly/io/Cursor-inl.h>
......@@ -770,3 +770,96 @@ TEST(IOBuf, StringOperations) {
EXPECT_STREQ("hello", curs.readFixedString(5).c_str());
}
}
TEST(IOBuf, ReadWhileTrue) {
auto isAlpha = [](uint8_t ch) {
return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z');
};
auto isDigit = [](uint8_t ch) { return (ch >= '0' && ch <= '9'); };
// Test reading alternating alphabetic and numeric strings
{
std::unique_ptr<IOBuf> chain(IOBuf::create(32));
Appender app(chain.get(), 0);
app.push(StringPiece("hello123world456"));
Cursor curs(chain.get());
EXPECT_STREQ("hello", curs.readWhile(isAlpha).c_str());
EXPECT_STREQ("123", curs.readWhile(isDigit).c_str());
EXPECT_STREQ("world", curs.readWhile(isAlpha).c_str());
EXPECT_STREQ("456", curs.readWhile(isDigit).c_str());
EXPECT_TRUE(curs.isAtEnd());
}
// The same, but also use skipWhile()
{
std::unique_ptr<IOBuf> chain(IOBuf::create(16));
Appender app(chain.get(), 0);
app.push(StringPiece("hello123world456"));
Cursor curs(chain.get());
EXPECT_STREQ("hello", curs.readWhile(isAlpha).c_str());
curs.skipWhile(isDigit);
curs.skipWhile(isAlpha);
EXPECT_STREQ("456", curs.readWhile(isDigit).c_str());
EXPECT_TRUE(curs.isAtEnd());
}
// Test readWhile() using data split across multiple buffers,
// including some empty buffers in the middle of the chain.
{
std::unique_ptr<IOBuf> chain;
// First element in the chain has "he"
auto buf = IOBuf::create(40);
Appender app(buf.get(), 0);
app.push(StringPiece("he"));
chain = std::move(buf);
// The second element has "ll", after 10 bytes of headroom
buf = IOBuf::create(40);
buf->advance(10);
app = Appender{buf.get(), 0};
app.push(StringPiece("ll"));
chain->prependChain(std::move(buf));
// The third element is empty
buf = IOBuf::create(40);
buf->advance(15);
chain->prependChain(std::move(buf));
// The fourth element has "o12"
buf = IOBuf::create(40);
buf->advance(37);
app = Appender{buf.get(), 0};
app.push(StringPiece("o12"));
chain->prependChain(std::move(buf));
// The fifth element has "3"
buf = IOBuf::create(40);
app = Appender{buf.get(), 0};
app.push(StringPiece("3"));
chain->prependChain(std::move(buf));
// The sixth element is empty
buf = IOBuf::create(40);
chain->prependChain(std::move(buf));
// The seventh element has "world456"
buf = IOBuf::create(40);
app = Appender{buf.get(), 0};
app.push(StringPiece("world456"));
chain->prependChain(std::move(buf));
// The eighth element is empty
buf = IOBuf::create(40);
chain->prependChain(std::move(buf));
Cursor curs(chain.get());
EXPECT_STREQ("hello", curs.readWhile(isAlpha).c_str());
EXPECT_STREQ("123", curs.readWhile(isDigit).c_str());
EXPECT_STREQ("world", curs.readWhile(isAlpha).c_str());
EXPECT_STREQ("456", curs.readWhile(isDigit).c_str());
EXPECT_TRUE(curs.isAtEnd());
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment