Commit 309ac912 authored by Christopher Dykes's avatar Christopher Dykes Committed by Facebook Github Bot 8

Use UTF-8 strings for strings with multi-byte Unicode code points in them

Summary: Because MSVC doesn't support strings with multi-byte Unicode code points in them unless it's in a UTF-8 string.

Reviewed By: yfeldblum

Differential Revision: D3507197

fbshipit-source-id: 27bff1efee03180716418fbfa9ef98f9c04929d9
parent 5f78f71a
...@@ -24,15 +24,15 @@ using folly::parseJson; ...@@ -24,15 +24,15 @@ using folly::parseJson;
using folly::toJson; using folly::toJson;
TEST(Json, Unicode) { TEST(Json, Unicode) {
auto val = parseJson("\"I \u2665 UTF-8\""); auto val = parseJson(u8"\"I \u2665 UTF-8\"");
EXPECT_EQ("I \u2665 UTF-8", val.asString()); EXPECT_EQ(u8"I \u2665 UTF-8", val.asString());
val = parseJson("\"I \\u2665 UTF-8\""); val = parseJson("\"I \\u2665 UTF-8\"");
EXPECT_EQ("I \u2665 UTF-8", val.asString()); EXPECT_EQ(u8"I \u2665 UTF-8", val.asString());
val = parseJson("\"I \U0001D11E playing in G-clef\""); val = parseJson(u8"\"I \U0001D11E playing in G-clef\"");
EXPECT_EQ("I \U0001D11E playing in G-clef", val.asString()); EXPECT_EQ(u8"I \U0001D11E playing in G-clef", val.asString());
val = parseJson("\"I \\uD834\\uDD1E playing in G-clef\""); val = parseJson("\"I \\uD834\\uDD1E playing in G-clef\"");
EXPECT_EQ("I \U0001D11E playing in G-clef", val.asString()); EXPECT_EQ(u8"I \U0001D11E playing in G-clef", val.asString());
} }
TEST(Json, Parse) { TEST(Json, Parse) {
...@@ -258,7 +258,7 @@ TEST(Json, JsonNonAsciiEncoding) { ...@@ -258,7 +258,7 @@ TEST(Json, JsonNonAsciiEncoding) {
TEST(Json, UTF8Retention) { TEST(Json, UTF8Retention) {
// test retention with valid utf8 strings // test retention with valid utf8 strings
std::string input = "\u2665"; std::string input = u8"\u2665";
std::string jsonInput = folly::toJson(input); std::string jsonInput = folly::toJson(input);
std::string output = folly::parseJson(jsonInput).asString(); std::string output = folly::parseJson(jsonInput).asString();
std::string jsonOutput = folly::toJson(output); std::string jsonOutput = folly::toJson(output);
...@@ -280,7 +280,7 @@ TEST(Json, UTF8EncodeNonAsciiRetention) { ...@@ -280,7 +280,7 @@ TEST(Json, UTF8EncodeNonAsciiRetention) {
opts.encode_non_ascii = true; opts.encode_non_ascii = true;
// test encode_non_ascii valid utf8 strings // test encode_non_ascii valid utf8 strings
std::string input = "\u2665"; std::string input = u8"\u2665";
std::string jsonInput = folly::json::serialize(input, opts); std::string jsonInput = folly::json::serialize(input, opts);
std::string output = folly::parseJson(jsonInput).asString(); std::string output = folly::parseJson(jsonInput).asString();
std::string jsonOutput = folly::json::serialize(output, opts); std::string jsonOutput = folly::json::serialize(output, opts);
...@@ -313,12 +313,15 @@ TEST(Json, UTF8Validation) { ...@@ -313,12 +313,15 @@ TEST(Json, UTF8Validation) {
EXPECT_ANY_THROW(folly::json::serialize("a\xe0\xa0\x80z\xe0\x80\x80", opts)); EXPECT_ANY_THROW(folly::json::serialize("a\xe0\xa0\x80z\xe0\x80\x80", opts));
opts.skip_invalid_utf8 = true; opts.skip_invalid_utf8 = true;
EXPECT_EQ(folly::json::serialize("a\xe0\xa0\x80z\xc0\x80", opts), EXPECT_EQ(
"\"a\xe0\xa0\x80z\ufffd\ufffd\""); folly::json::serialize("a\xe0\xa0\x80z\xc0\x80", opts),
EXPECT_EQ(folly::json::serialize("a\xe0\xa0\x80z\xc0\x80\x80", opts), u8"\"a\xe0\xa0\x80z\ufffd\ufffd\"");
"\"a\xe0\xa0\x80z\ufffd\ufffd\ufffd\""); EXPECT_EQ(
EXPECT_EQ(folly::json::serialize("z\xc0\x80z\xe0\xa0\x80", opts), folly::json::serialize("a\xe0\xa0\x80z\xc0\x80\x80", opts),
"\"z\ufffd\ufffdz\xe0\xa0\x80\""); u8"\"a\xe0\xa0\x80z\ufffd\ufffd\ufffd\"");
EXPECT_EQ(
folly::json::serialize("z\xc0\x80z\xe0\xa0\x80", opts),
u8"\"z\ufffd\ufffdz\xe0\xa0\x80\"");
opts.encode_non_ascii = true; opts.encode_non_ascii = true;
EXPECT_EQ(folly::json::serialize("a\xe0\xa0\x80z\xc0\x80", opts), EXPECT_EQ(folly::json::serialize("a\xe0\xa0\x80z\xc0\x80", opts),
......
...@@ -1333,7 +1333,7 @@ TEST(String, stripLeftMargin_no_post_whitespace) { ...@@ -1333,7 +1333,7 @@ TEST(String, stripLeftMargin_no_post_whitespace) {
EXPECT_EQ(expected, stripLeftMargin(input)); EXPECT_EQ(expected, stripLeftMargin(input));
} }
const folly::StringPiece kTestUTF8 = "This is \U0001F602 stuff!"; const folly::StringPiece kTestUTF8 = u8"This is \U0001F602 stuff!";
TEST(UTF8StringPiece, valid_utf8) { TEST(UTF8StringPiece, valid_utf8) {
folly::StringPiece sp = kTestUTF8; folly::StringPiece sp = kTestUTF8;
...@@ -1357,7 +1357,7 @@ TEST(UTF8StringPiece, invalid_mid_codepoint) { ...@@ -1357,7 +1357,7 @@ TEST(UTF8StringPiece, invalid_mid_codepoint) {
} }
TEST(UTF8StringPiece, valid_implicit_conversion) { TEST(UTF8StringPiece, valid_implicit_conversion) {
std::string input = "\U0001F602\U0001F602\U0001F602"; std::string input = u8"\U0001F602\U0001F602\U0001F602";
auto checkImplicitCtor = [](UTF8StringPiece implicitCtor) { auto checkImplicitCtor = [](UTF8StringPiece implicitCtor) {
return implicitCtor.walk_size(); return implicitCtor.walk_size();
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment