Commit 116f13d8 authored by Giuseppe Ottaviano's avatar Giuseppe Ottaviano Committed by facebook-github-bot-1

Optimize getline(istream&, fbstring&) implementation

Summary:
Current `getline` implementation in `fbstring` always allocates, even if the passed string is already large enough. Furthermore, the growing strategy relies on outdated assumptions about the allocator.
This implementation reuses the existing allocation as much as possible, then uses exponential growth.

Reviewed By: luciang, philippv

Differential Revision: D2871976

fb-gh-sync-id: 8db9512030be3f4953efa8f008747827504c032c
parent 754568a3
......@@ -1333,10 +1333,8 @@ public:
}
basic_fbstring& append(const value_type* s, size_type n) {
#ifndef NDEBUG
Invariant checker(*this);
(void) checker;
#endif
if (FBSTRING_UNLIKELY(!n)) {
// Unlikely but must be done
return *this;
......@@ -1411,7 +1409,7 @@ public:
basic_fbstring& assign(const value_type* s, const size_type n) {
Invariant checker(*this);
(void) checker;
if (size() >= n) {
std::copy(s, s + n, begin());
resize(n);
......@@ -1473,13 +1471,56 @@ public:
return begin() + pos;
}
#ifndef _LIBSTDCXX_FBSTRING
private:
typedef std::basic_istream<value_type, traits_type> istream_type;
public:
friend inline istream_type& getline(istream_type& is,
basic_fbstring& str,
value_type delim) {
Invariant checker(str);
str.clear();
size_t size = 0;
while (true) {
size_t avail = str.capacity() - size;
// fbstring has 1 byte extra capacity for the null terminator,
// and getline null-terminates the read string.
is.getline(str.store_.expand_noinit(avail), avail + 1, delim);
size += is.gcount();
if (is.bad() || is.eof() || !is.fail()) {
// Done by either failure, end of file, or normal read.
if (!is.bad() && !is.eof()) {
--size; // gcount() also accounts for the delimiter.
}
str.resize(size);
break;
}
assert(size == str.size());
assert(size == str.capacity());
// Start at minimum allocation 63 + terminator = 64.
str.reserve(std::max<size_t>(63, 3 * size / 2));
// Clear the error so we can continue reading.
is.clear();
}
return is;
}
friend inline istream_type& getline(istream_type& is, basic_fbstring& str) {
return getline(is, str, '\n');
}
#endif
private:
template <int i> class Selector {};
iterator insertImplDiscr(const_iterator p,
size_type n, value_type c, Selector<1>) {
Invariant checker(*this);
(void) checker;
auto const pos = p - begin();
assert(p >= begin() && p <= end());
if (capacity() - size() < n) {
......@@ -1517,7 +1558,7 @@ private:
iterator insertImpl(const_iterator i,
FwdIterator s1, FwdIterator s2, std::forward_iterator_tag) {
Invariant checker(*this);
(void) checker;
const size_type pos = i - begin();
const typename std::iterator_traits<FwdIterator>::difference_type n2 =
std::distance(s1, s2);
......@@ -1581,7 +1622,7 @@ public:
basic_fbstring& erase(size_type pos = 0, size_type n = npos) {
Invariant checker(*this);
(void) checker;
enforce(pos <= length(), std::__throw_out_of_range, "");
procrustes(n, length() - pos);
std::copy(begin() + pos + n, end(), begin() + pos);
......@@ -1635,7 +1676,7 @@ public:
basic_fbstring& replace(size_type pos, size_type n1,
StrOrLength s_or_n2, NumOrChar n_or_c) {
Invariant checker(*this);
(void) checker;
enforce(pos <= size(), std::__throw_out_of_range, "");
procrustes(n1, length() - pos);
const iterator b = begin() + pos;
......@@ -1714,7 +1755,6 @@ private:
void replaceImpl(iterator i1, iterator i2,
FwdIterator s1, FwdIterator s2, std::forward_iterator_tag) {
Invariant checker(*this);
(void) checker;
// Handle aliased replace
if (replaceAliased(i1, i2, s1, s2,
......@@ -2406,57 +2446,6 @@ operator<<(
return os;
}
#ifndef _LIBSTDCXX_FBSTRING
template <typename E, class T, class A, class S>
inline
std::basic_istream<typename basic_fbstring<E, T, A, S>::value_type,
typename basic_fbstring<E, T, A, S>::traits_type>&
getline(
std::basic_istream<typename basic_fbstring<E, T, A, S>::value_type,
typename basic_fbstring<E, T, A, S>::traits_type>& is,
basic_fbstring<E, T, A, S>& str,
typename basic_fbstring<E, T, A, S>::value_type delim) {
// Use the nonstandard getdelim()
char * buf = nullptr;
size_t size = 0;
for (;;) {
// This looks quadratic but it really depends on realloc
auto const newSize = size + 128;
buf = static_cast<char*>(checkedRealloc(buf, newSize));
is.getline(buf + size, newSize - size, delim);
if (is.bad() || is.eof() || !is.fail()) {
// done by either failure, end of file, or normal read
size += std::strlen(buf + size);
break;
}
// Here we have failed due to too short a buffer
// Minus one to discount the terminating '\0'
size = newSize - 1;
assert(buf[size] == 0);
// Clear the error so we can continue reading
is.clear();
}
basic_fbstring<E, T, A, S> result(buf, size, size + 1,
AcquireMallocatedString());
result.swap(str);
return is;
}
template <typename E, class T, class A, class S>
inline
std::basic_istream<typename basic_fbstring<E, T, A, S>::value_type,
typename basic_fbstring<E, T, A, S>::traits_type>&
getline(
std::basic_istream<typename basic_fbstring<E, T, A, S>::value_type,
typename basic_fbstring<E, T, A, S>::traits_type>& is,
basic_fbstring<E, T, A, S>& str) {
// Just forward to the version with a delimiter
return getline(is, str, '\n');
}
#endif
template <typename E1, class T, class A, class S>
const typename basic_fbstring<E1, T, A, S>::size_type
basic_fbstring<E1, T, A, S>::npos =
......
......@@ -23,6 +23,7 @@
#include <list>
#include <fstream>
#include <sstream>
#include <boost/algorithm/string.hpp>
#include <boost/random.hpp>
......
......@@ -22,9 +22,9 @@
#include <atomic>
#include <cstdlib>
#include <list>
#include <fstream>
#include <iomanip>
#include <list>
#include <sstream>
#include <boost/algorithm/string.hpp>
#include <boost/random.hpp>
#include <gtest/gtest.h>
......@@ -1114,7 +1114,7 @@ TEST(FBString, testAllClauses) {
}
TEST(FBString, testGetline) {
fbstring s1 = "\
string s1 = "\
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Cras accumsan \n\
elit ut urna consectetur in sagittis mi auctor. Nulla facilisi. In nec \n\
dolor leo, vitae imperdiet neque. Donec ut erat mauris, a faucibus \n\
......@@ -1132,28 +1132,17 @@ massa, ut accumsan magna. Donec imperdiet tempor nisi et \n\
laoreet. Phasellus lectus quam, ultricies ut tincidunt in, dignissim \n\
id eros. Mauris vulputate tortor nec neque pellentesque sagittis quis \n\
sed nisl. In diam lacus, lobortis ut posuere nec, ornare id quam.";
char f[] = "/tmp/fbstring_testing.XXXXXX";
int fd = mkstemp(f);
EXPECT_TRUE(fd > 0);
if (fd > 0) {
close(fd); // Yeah
std::ofstream out(f);
if (!(out << s1)) {
EXPECT_TRUE(0) << "Couldn't write to temp file.";
return;
}
}
vector<fbstring> v;
boost::split(v, s1, boost::is_any_of("\n"));
{
ifstream input(f);
istringstream input(s1);
fbstring line;
FOR_EACH (i, v) {
EXPECT_TRUE(!getline(input, line).fail());
EXPECT_EQ(line, *i);
}
}
unlink(f);
}
TEST(FBString, testMoveCtor) {
......
......@@ -250,3 +250,27 @@ void BENCHFUN(short_append)(size_t iters, size_t arg) {
}
BENCHMARK_PARAM(BENCHFUN(short_append), 23);
BENCHMARK_PARAM(BENCHFUN(short_append), 1024);
void BENCHFUN(getline)(size_t iters, size_t arg) {
string lines;
BENCHMARK_SUSPEND {
string line;
FOR_EACH_RANGE(i, 0, 512) {
randomString(&line, arg);
lines += line;
lines += '\n';
}
}
STRING line;
while (iters) {
std::istringstream is(lines);
while (iters && getline(is, line)) {
folly::doNotOptimizeAway(line.size());
iters--;
}
}
}
BENCHMARK_PARAM(BENCHFUN(getline), 23);
BENCHMARK_PARAM(BENCHFUN(getline), 1000);
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment