Commit 049fb12e authored by Stella Lau's avatar Stella Lau Committed by Facebook Github Bot

Compress empty strings with underlying compressor

Summary:
- Current behavior compresses empty strings to empty strings. This is undesirable as decompression using underlying decompressor (side-stepping the codec) will fail. This change passes empty strings to the underlying compressor
- Decompressing empty string -> empty string was kept for backwards compatibility
- Fix `getUncompressedLength` for zlib

Reviewed By: terrelln, yfeldblum

Differential Revision: D5740034

fbshipit-source-id: 5a747ea4963dad872103209aa4410197f6c605db
parent 4fd5e900
......@@ -75,9 +75,6 @@ std::unique_ptr<IOBuf> Codec::compress(const IOBuf* data) {
throw std::invalid_argument("Codec: data must not be nullptr");
}
uint64_t len = data->computeChainDataLength();
if (len == 0) {
return IOBuf::create(0);
}
if (len > maxUncompressedLength()) {
throw std::runtime_error("Codec: uncompressed length too large");
}
......@@ -87,9 +84,6 @@ std::unique_ptr<IOBuf> Codec::compress(const IOBuf* data) {
std::string Codec::compress(const StringPiece data) {
const uint64_t len = data.size();
if (len == 0) {
return "";
}
if (len > maxUncompressedLength()) {
throw std::runtime_error("Codec: uncompressed length too large");
}
......@@ -191,9 +185,6 @@ std::string Codec::doUncompressString(
}
uint64_t Codec::maxCompressedLength(uint64_t uncompressedLength) const {
if (uncompressedLength == 0) {
return 0;
}
return doMaxCompressedLength(uncompressedLength);
}
......@@ -201,8 +192,8 @@ Optional<uint64_t> Codec::getUncompressedLength(
const folly::IOBuf* data,
Optional<uint64_t> uncompressedLength) const {
auto const compressedLength = data->computeChainDataLength();
if (uncompressedLength == uint64_t(0) || compressedLength == 0) {
if (uncompressedLength.value_or(0) != 0 || compressedLength != 0) {
if (compressedLength == 0) {
if (uncompressedLength.value_or(0) != 0) {
throw std::runtime_error("Invalid uncompressed length");
}
return 0;
......@@ -242,16 +233,12 @@ bool StreamCodec::compressStream(
ByteRange& input,
MutableByteRange& output,
StreamCodec::FlushOp flushOp) {
if (state_ == State::RESET && input.empty()) {
if (flushOp == StreamCodec::FlushOp::NONE) {
return false;
}
if (flushOp == StreamCodec::FlushOp::END &&
uncompressedLength().value_or(0) != 0) {
throw std::runtime_error("Codec: invalid uncompressed length");
}
return true;
if (state_ == State::RESET && input.empty() &&
flushOp == StreamCodec::FlushOp::END &&
uncompressedLength().value_or(0) != 0) {
throw std::runtime_error("Codec: invalid uncompressed length");
}
if (!uncompressedLength() && needsDataLength()) {
throw std::runtime_error("Codec: uncompressed length required");
}
......@@ -1030,7 +1017,7 @@ class LZMA2StreamCodec final : public StreamCodec {
void resetCStream();
void resetDStream();
size_t decodeVarint(ByteRange& input);
bool decodeAndCheckVarint(ByteRange& input);
bool flushVarintBuffer(MutableByteRange& output);
void resetVarintBuffer();
......@@ -1243,11 +1230,16 @@ bool LZMA2StreamCodec::doCompressStream(
*
* If there are too many bytes and the varint is not valid, throw a
* runtime_error.
* Returns the decoded size or 0 if more bytes are needed.
*
* If the uncompressed length was provided and a decoded varint does not match
* the provided length, throw a runtime_error.
*
* Returns true if the varint was successfully decoded and matches the
* uncompressed length if provided, and false if more bytes are needed.
*/
size_t LZMA2StreamCodec::decodeVarint(ByteRange& input) {
bool LZMA2StreamCodec::decodeAndCheckVarint(ByteRange& input) {
if (input.empty()) {
return 0;
return false;
}
size_t const numBytesToCopy =
std::min(kMaxVarintLength64 - varintBufferPos_, input.size());
......@@ -1260,14 +1252,17 @@ size_t LZMA2StreamCodec::decodeVarint(ByteRange& input) {
if (ret.hasValue()) {
size_t const varintSize = rangeSize - range.size();
input.advance(varintSize - varintBufferPos_);
return ret.value();
if (uncompressedLength() && *uncompressedLength() != ret.value()) {
throw std::runtime_error("LZMA2StreamCodec: invalid uncompressed length");
}
return true;
} else if (ret.error() == DecodeVarintError::TooManyBytes) {
throw std::runtime_error("LZMA2StreamCodec: invalid uncompressed length");
} else {
// Too few bytes
input.advance(numBytesToCopy);
varintBufferPos_ += numBytesToCopy;
return 0;
return false;
}
}
......@@ -1288,13 +1283,9 @@ bool LZMA2StreamCodec::doUncompressStream(
if (needDecodeSize_) {
// Try decoding the varint. If the input does not contain the entire varint,
// buffer the input. If the varint can not be decoded, fail.
size_t const size = decodeVarint(input);
if (!size) {
if (!decodeAndCheckVarint(input)) {
return false;
}
if (uncompressedLength() && *uncompressedLength() != size) {
throw std::runtime_error("LZMA2StreamCodec: invalid uncompressed length");
}
needDecodeSize_ = false;
}
......
......@@ -131,9 +131,6 @@ class Codec {
* Compress data, returning an IOBuf (which may share storage with data).
* Throws std::invalid_argument if data is larger than
* maxUncompressedLength().
*
* Regardless of the behavior of the underlying compressor, compressing
* an empty IOBuf chain will return an empty IOBuf chain.
*/
std::unique_ptr<IOBuf> compress(const folly::IOBuf* data);
......
......@@ -176,7 +176,10 @@ bool ZlibStreamCodec::canUncompress(const IOBuf* data, Optional<uint64_t>)
uint64_t ZlibStreamCodec::doMaxCompressedLength(
uint64_t uncompressedLength) const {
return deflateBound(nullptr, uncompressedLength);
// When passed a nullptr, deflateBound() adds 6 bytes for a zlib wrapper. A
// gzip wrapper is 18 bytes, so we add the 12 byte difference.
return deflateBound(nullptr, uncompressedLength) +
(options_.format == Options::Format::GZIP ? 12 : 0);
}
std::unique_ptr<Codec> ZlibStreamCodec::createCodec(
......
......@@ -205,7 +205,10 @@ class CompressionTest
protected:
void SetUp() override {
auto tup = GetParam();
uncompressedLength_ = uint64_t(1) << std::tr1::get<0>(tup);
int lengthLog = std::tr1::get<0>(tup);
// Small hack to test empty data
uncompressedLength_ =
(lengthLog < 0) ? 0 : uint64_t(1) << std::tr1::get<0>(tup);
chunks_ = std::tr1::get<1>(tup);
codec_ = getCodec(std::tr1::get<2>(tup));
}
......@@ -225,6 +228,9 @@ class CompressionTest
void CompressionTest::runSimpleIOBufTest(const DataHolder& dh) {
const auto original = split(IOBuf::wrapBuffer(dh.data(uncompressedLength_)));
const auto compressed = split(codec_->compress(original.get()));
EXPECT_LE(
compressed->computeChainDataLength(),
codec_->maxCompressedLength(uncompressedLength_));
if (!codec_->needsUncompressedLength()) {
auto uncompressed = codec_->uncompress(compressed.get());
EXPECT_EQ(uncompressedLength_, uncompressed->computeChainDataLength());
......@@ -243,6 +249,9 @@ void CompressionTest::runSimpleStringTest(const DataHolder& dh) {
reinterpret_cast<const char*>(dh.data(uncompressedLength_).data()),
uncompressedLength_);
const auto compressed = codec_->compress(original);
EXPECT_LE(
compressed.length(), codec_->maxCompressedLength(uncompressedLength_));
if (!codec_->needsUncompressedLength()) {
auto uncompressed = codec_->uncompress(compressed);
EXPECT_EQ(uncompressedLength_, uncompressed.length());
......@@ -301,7 +310,7 @@ INSTANTIATE_TEST_CASE_P(
CompressionTest,
CompressionTest,
testing::Combine(
testing::Values(0, 1, 12, 22, 25, 27),
testing::Values(-1, 0, 1, 12, 22, 25, 27),
testing::Values(1, 2, 3, 8, 65),
testing::ValuesIn(availableCodecs())));
......@@ -484,7 +493,6 @@ TEST(StreamingUnitTest, needsDataLength) {
}
TEST_P(StreamingUnitTest, maxCompressedLength) {
EXPECT_EQ(0, codec_->maxCompressedLength(0));
for (uint64_t const length : {1, 10, 100, 1000, 10000, 100000, 1000000}) {
EXPECT_GE(codec_->maxCompressedLength(length), length);
}
......@@ -494,11 +502,11 @@ TEST_P(StreamingUnitTest, getUncompressedLength) {
auto const empty = IOBuf::create(0);
EXPECT_EQ(uint64_t(0), codec_->getUncompressedLength(empty.get()));
EXPECT_EQ(uint64_t(0), codec_->getUncompressedLength(empty.get(), 0));
EXPECT_ANY_THROW(codec_->getUncompressedLength(empty.get(), 1));
auto const data = IOBuf::wrapBuffer(randomDataHolder.data(100));
auto const compressed = codec_->compress(data.get());
EXPECT_ANY_THROW(codec_->getUncompressedLength(data.get(), 0));
if (auto const length = codec_->getUncompressedLength(data.get())) {
EXPECT_EQ(100, *length);
}
......@@ -512,35 +520,46 @@ TEST_P(StreamingUnitTest, getUncompressedLength) {
TEST_P(StreamingUnitTest, emptyData) {
ByteRange input{};
auto buffer = IOBuf::create(1);
auto buffer = IOBuf::create(codec_->maxCompressedLength(0));
buffer->append(buffer->capacity());
MutableByteRange output{};
MutableByteRange output;
// Test compressing empty data in one pass
if (!codec_->needsDataLength()) {
output = {buffer->writableData(), buffer->length()};
EXPECT_TRUE(
codec_->compressStream(input, output, StreamCodec::FlushOp::END));
}
codec_->resetStream(0);
EXPECT_TRUE(codec_->compressStream(input, output, StreamCodec::FlushOp::END));
output = {buffer->writableData(), buffer->length()};
EXPECT_TRUE(codec_->compressStream(input, output, StreamCodec::FlushOp::END));
EXPECT_EQ(buffer->length(), output.size());
// Test uncompressing the compressed empty data is equivalent to the empty
// string
{
size_t compressedSize = buffer->length() - output.size();
auto const compressed =
IOBuf::copyBuffer(buffer->writableData(), compressedSize);
auto inputRange = compressed->coalesce();
codec_->resetStream(0);
output = {buffer->writableData(), buffer->length()};
EXPECT_TRUE(codec_->uncompressStream(
inputRange, output, StreamCodec::FlushOp::END));
EXPECT_EQ(output.size(), buffer->length());
}
// Test compressing empty data with multiple calls to compressStream()
codec_->resetStream(0);
output = {};
EXPECT_FALSE(codec_->compressStream(input, output));
EXPECT_TRUE(
codec_->compressStream(input, output, StreamCodec::FlushOp::FLUSH));
EXPECT_TRUE(codec_->compressStream(input, output, StreamCodec::FlushOp::END));
codec_->resetStream(0);
output = {buffer->writableData(), buffer->length()};
EXPECT_FALSE(codec_->compressStream(input, output));
EXPECT_TRUE(
codec_->compressStream(input, output, StreamCodec::FlushOp::FLUSH));
EXPECT_TRUE(codec_->compressStream(input, output, StreamCodec::FlushOp::END));
EXPECT_EQ(buffer->length(), output.size());
{
auto largeBuffer = IOBuf::create(codec_->maxCompressedLength(0) * 2);
largeBuffer->append(largeBuffer->capacity());
codec_->resetStream(0);
output = {largeBuffer->writableData(), largeBuffer->length()};
EXPECT_FALSE(codec_->compressStream(input, output));
EXPECT_TRUE(
codec_->compressStream(input, output, StreamCodec::FlushOp::FLUSH));
EXPECT_TRUE(
codec_->compressStream(input, output, StreamCodec::FlushOp::END));
}
// Test uncompressing empty data
output = {};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment