Commit b0193e80 authored by Tom Jackson's avatar Tom Jackson Committed by facebook-github-bot-1

UTF8StringPiece, wrapping boost::u8_to_u32

Summary: For handling UTF8 strings better.

Reviewed By: yfeldblum

Differential Revision: D1956771

fb-gh-sync-id: e074f9f2c9b472f5e619fef25d8e17296847773c
parent d69f6a7a
......@@ -357,7 +357,6 @@ public:
return e_ - b_;
}
size_type walk_size() const {
assert(b_ <= e_);
return std::distance(b_, e_);
}
bool empty() const { return b_ == e_; }
......
......@@ -21,6 +21,7 @@
#include <stdarg.h>
#include <string>
#include <boost/type_traits.hpp>
#include <boost/regex/pending/unicode_iterator.hpp>
#ifdef FOLLY_HAVE_DEPRECATED_ASSOC
#ifdef _GLIBCXX_SYMVER
......@@ -592,6 +593,19 @@ inline void toLowerAscii(MutableStringPiece str) {
toLowerAscii(str.begin(), str.size());
}
template <class Iterator = const char*,
class Base = folly::Range<boost::u8_to_u32_iterator<Iterator>>>
class UTF8Range : public Base {
public:
/* implicit */ UTF8Range(const folly::Range<Iterator> baseRange)
: Base(boost::u8_to_u32_iterator<Iterator>(
baseRange.begin(), baseRange.begin(), baseRange.end()),
boost::u8_to_u32_iterator<Iterator>(
baseRange.end(), baseRange.begin(), baseRange.end())) {}
};
using UTF8StringPiece = UTF8Range<const char*>;
} // namespace folly
// Hook into boost's type traits
......
......@@ -1337,6 +1337,29 @@ TEST(String, whitespace) {
EXPECT_EQ("", rtrimWhitespace("\r "));
}
const folly::StringPiece kTestUTF8 = "This is \U0001F602 stuff!";
TEST(UTF8StringPiece, valid_utf8) {
folly::StringPiece sp = kTestUTF8;
UTF8StringPiece utf8 = sp;
// utf8.size() not available since it's not a random-access range
EXPECT_EQ(16, utf8.walk_size());
}
TEST(UTF8StringPiece, valid_suffix) {
UTF8StringPiece utf8 = kTestUTF8.subpiece(8);
EXPECT_EQ(8, utf8.walk_size());
}
TEST(UTF8StringPiece, empty_mid_codepoint) {
UTF8StringPiece utf8 = kTestUTF8.subpiece(9, 0); // okay since it's empty
EXPECT_EQ(0, utf8.walk_size());
}
TEST(UTF8StringPiece, invalid_mid_codepoint) {
EXPECT_THROW(UTF8StringPiece(kTestUTF8.subpiece(9, 1)), std::out_of_range);
}
int main(int argc, char *argv[]) {
testing::InitGoogleTest(&argc, argv);
gflags::ParseCommandLineFlags(&argc, &argv, true);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment