Commit 309ac912 authored by Christopher Dykes's avatar Christopher Dykes Committed by Facebook Github Bot 8

Use UTF-8 strings for strings with multi-byte Unicode code points in them

Summary: Because MSVC doesn't support strings with multi-byte Unicode code points in them unless it's in a UTF-8 string.

Reviewed By: yfeldblum

Differential Revision: D3507197

fbshipit-source-id: 27bff1efee03180716418fbfa9ef98f9c04929d9
parent 5f78f71a
......@@ -24,15 +24,15 @@ using folly::parseJson;
using folly::toJson;
TEST(Json, Unicode) {
auto val = parseJson("\"I \u2665 UTF-8\"");
EXPECT_EQ("I \u2665 UTF-8", val.asString());
auto val = parseJson(u8"\"I \u2665 UTF-8\"");
EXPECT_EQ(u8"I \u2665 UTF-8", val.asString());
val = parseJson("\"I \\u2665 UTF-8\"");
EXPECT_EQ("I \u2665 UTF-8", val.asString());
val = parseJson("\"I \U0001D11E playing in G-clef\"");
EXPECT_EQ("I \U0001D11E playing in G-clef", val.asString());
EXPECT_EQ(u8"I \u2665 UTF-8", val.asString());
val = parseJson(u8"\"I \U0001D11E playing in G-clef\"");
EXPECT_EQ(u8"I \U0001D11E playing in G-clef", val.asString());
val = parseJson("\"I \\uD834\\uDD1E playing in G-clef\"");
EXPECT_EQ("I \U0001D11E playing in G-clef", val.asString());
EXPECT_EQ(u8"I \U0001D11E playing in G-clef", val.asString());
}
TEST(Json, Parse) {
......@@ -258,7 +258,7 @@ TEST(Json, JsonNonAsciiEncoding) {
TEST(Json, UTF8Retention) {
// test retention with valid utf8 strings
std::string input = "\u2665";
std::string input = u8"\u2665";
std::string jsonInput = folly::toJson(input);
std::string output = folly::parseJson(jsonInput).asString();
std::string jsonOutput = folly::toJson(output);
......@@ -280,7 +280,7 @@ TEST(Json, UTF8EncodeNonAsciiRetention) {
opts.encode_non_ascii = true;
// test encode_non_ascii valid utf8 strings
std::string input = "\u2665";
std::string input = u8"\u2665";
std::string jsonInput = folly::json::serialize(input, opts);
std::string output = folly::parseJson(jsonInput).asString();
std::string jsonOutput = folly::json::serialize(output, opts);
......@@ -313,12 +313,15 @@ TEST(Json, UTF8Validation) {
EXPECT_ANY_THROW(folly::json::serialize("a\xe0\xa0\x80z\xe0\x80\x80", opts));
opts.skip_invalid_utf8 = true;
EXPECT_EQ(folly::json::serialize("a\xe0\xa0\x80z\xc0\x80", opts),
"\"a\xe0\xa0\x80z\ufffd\ufffd\"");
EXPECT_EQ(folly::json::serialize("a\xe0\xa0\x80z\xc0\x80\x80", opts),
"\"a\xe0\xa0\x80z\ufffd\ufffd\ufffd\"");
EXPECT_EQ(folly::json::serialize("z\xc0\x80z\xe0\xa0\x80", opts),
"\"z\ufffd\ufffdz\xe0\xa0\x80\"");
EXPECT_EQ(
folly::json::serialize("a\xe0\xa0\x80z\xc0\x80", opts),
u8"\"a\xe0\xa0\x80z\ufffd\ufffd\"");
EXPECT_EQ(
folly::json::serialize("a\xe0\xa0\x80z\xc0\x80\x80", opts),
u8"\"a\xe0\xa0\x80z\ufffd\ufffd\ufffd\"");
EXPECT_EQ(
folly::json::serialize("z\xc0\x80z\xe0\xa0\x80", opts),
u8"\"z\ufffd\ufffdz\xe0\xa0\x80\"");
opts.encode_non_ascii = true;
EXPECT_EQ(folly::json::serialize("a\xe0\xa0\x80z\xc0\x80", opts),
......
......@@ -1333,7 +1333,7 @@ TEST(String, stripLeftMargin_no_post_whitespace) {
EXPECT_EQ(expected, stripLeftMargin(input));
}
const folly::StringPiece kTestUTF8 = "This is \U0001F602 stuff!";
const folly::StringPiece kTestUTF8 = u8"This is \U0001F602 stuff!";
TEST(UTF8StringPiece, valid_utf8) {
folly::StringPiece sp = kTestUTF8;
......@@ -1357,7 +1357,7 @@ TEST(UTF8StringPiece, invalid_mid_codepoint) {
}
TEST(UTF8StringPiece, valid_implicit_conversion) {
std::string input = "\U0001F602\U0001F602\U0001F602";
std::string input = u8"\U0001F602\U0001F602\U0001F602";
auto checkImplicitCtor = [](UTF8StringPiece implicitCtor) {
return implicitCtor.walk_size();
};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment