Commit b0193e80 authored by Tom Jackson's avatar Tom Jackson Committed by facebook-github-bot-1

UTF8StringPiece, wrapping boost::u8_to_u32

Summary: For handling UTF8 strings better.

Reviewed By: yfeldblum

Differential Revision: D1956771

fb-gh-sync-id: e074f9f2c9b472f5e619fef25d8e17296847773c
parent d69f6a7a
...@@ -357,7 +357,6 @@ public: ...@@ -357,7 +357,6 @@ public:
return e_ - b_; return e_ - b_;
} }
size_type walk_size() const { size_type walk_size() const {
assert(b_ <= e_);
return std::distance(b_, e_); return std::distance(b_, e_);
} }
bool empty() const { return b_ == e_; } bool empty() const { return b_ == e_; }
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include <stdarg.h> #include <stdarg.h>
#include <string> #include <string>
#include <boost/type_traits.hpp> #include <boost/type_traits.hpp>
#include <boost/regex/pending/unicode_iterator.hpp>
#ifdef FOLLY_HAVE_DEPRECATED_ASSOC #ifdef FOLLY_HAVE_DEPRECATED_ASSOC
#ifdef _GLIBCXX_SYMVER #ifdef _GLIBCXX_SYMVER
...@@ -592,6 +593,19 @@ inline void toLowerAscii(MutableStringPiece str) { ...@@ -592,6 +593,19 @@ inline void toLowerAscii(MutableStringPiece str) {
toLowerAscii(str.begin(), str.size()); toLowerAscii(str.begin(), str.size());
} }
template <class Iterator = const char*,
class Base = folly::Range<boost::u8_to_u32_iterator<Iterator>>>
class UTF8Range : public Base {
public:
/* implicit */ UTF8Range(const folly::Range<Iterator> baseRange)
: Base(boost::u8_to_u32_iterator<Iterator>(
baseRange.begin(), baseRange.begin(), baseRange.end()),
boost::u8_to_u32_iterator<Iterator>(
baseRange.end(), baseRange.begin(), baseRange.end())) {}
};
using UTF8StringPiece = UTF8Range<const char*>;
} // namespace folly } // namespace folly
// Hook into boost's type traits // Hook into boost's type traits
......
...@@ -1337,6 +1337,29 @@ TEST(String, whitespace) { ...@@ -1337,6 +1337,29 @@ TEST(String, whitespace) {
EXPECT_EQ("", rtrimWhitespace("\r ")); EXPECT_EQ("", rtrimWhitespace("\r "));
} }
const folly::StringPiece kTestUTF8 = "This is \U0001F602 stuff!";
TEST(UTF8StringPiece, valid_utf8) {
folly::StringPiece sp = kTestUTF8;
UTF8StringPiece utf8 = sp;
// utf8.size() not available since it's not a random-access range
EXPECT_EQ(16, utf8.walk_size());
}
TEST(UTF8StringPiece, valid_suffix) {
UTF8StringPiece utf8 = kTestUTF8.subpiece(8);
EXPECT_EQ(8, utf8.walk_size());
}
TEST(UTF8StringPiece, empty_mid_codepoint) {
UTF8StringPiece utf8 = kTestUTF8.subpiece(9, 0); // okay since it's empty
EXPECT_EQ(0, utf8.walk_size());
}
TEST(UTF8StringPiece, invalid_mid_codepoint) {
EXPECT_THROW(UTF8StringPiece(kTestUTF8.subpiece(9, 1)), std::out_of_range);
}
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
testing::InitGoogleTest(&argc, argv); testing::InitGoogleTest(&argc, argv);
gflags::ParseCommandLineFlags(&argc, &argv, true); gflags::ParseCommandLineFlags(&argc, &argv, true);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment