Commit 6fac16c6 authored by Nick Terrell's avatar Nick Terrell Committed by Facebook Github Bot

Allow decompresion without uncompressedSize

Summary:
The `LZMA2` codec doesn't allow decompression without passing the uncompressed size.
Every other codec that supports streaming decompression without the uncompressed size by streaming into an IOBuf chain.
Benchmarks show that reducing the `defaultBufferLength` to 256 KiB doesn't slow down decompression of large files, but it speeds up decompression of small files (< 1000 bytes).

Reviewed By: yfeldblum

Differential Revision: D4751571

fbshipit-source-id: 39dbe6754a1ecdc2b7ba3107e9face926d4c98ca
parent 06598425
......@@ -973,7 +973,7 @@ LZMA2Codec::LZMA2Codec(int level, CodecType type) : Codec(type) {
}
bool LZMA2Codec::doNeedsUncompressedLength() const {
return !encodeSize();
return false;
}
uint64_t LZMA2Codec::doMaxUncompressedLength() const {
......@@ -1104,27 +1104,25 @@ std::unique_ptr<IOBuf> LZMA2Codec::doUncompress(const IOBuf* data,
SCOPE_EXIT { lzma_end(&stream); };
// Max 64MiB in one go
constexpr uint32_t maxSingleStepLength = uint32_t(64) << 20; // 64MiB
constexpr uint32_t defaultBufferLength = uint32_t(4) << 20; // 4MiB
constexpr uint32_t maxSingleStepLength = uint32_t(64) << 20; // 64MiB
constexpr uint32_t defaultBufferLength = uint32_t(256) << 10; // 256 KiB
folly::io::Cursor cursor(data);
uint64_t actualUncompressedLength;
if (encodeSize()) {
actualUncompressedLength = decodeVarintFromCursor(cursor);
const uint64_t actualUncompressedLength = decodeVarintFromCursor(cursor);
if (uncompressedLength != UNKNOWN_UNCOMPRESSED_LENGTH &&
uncompressedLength != actualUncompressedLength) {
throw std::runtime_error("LZMA2Codec: invalid uncompressed length");
}
} else {
actualUncompressedLength = uncompressedLength;
DCHECK_NE(actualUncompressedLength, UNKNOWN_UNCOMPRESSED_LENGTH);
uncompressedLength = actualUncompressedLength;
}
auto out = addOutputBuffer(
&stream,
(actualUncompressedLength <= maxSingleStepLength ?
actualUncompressedLength :
defaultBufferLength));
((uncompressedLength != UNKNOWN_UNCOMPRESSED_LENGTH &&
uncompressedLength <= maxSingleStepLength)
? uncompressedLength
: defaultBufferLength));
bool streamEnd = false;
auto buf = cursor.peekBytes();
......@@ -1151,9 +1149,10 @@ std::unique_ptr<IOBuf> LZMA2Codec::doUncompress(const IOBuf* data,
out->prev()->trimEnd(stream.avail_out);
if (actualUncompressedLength != stream.total_out) {
throw std::runtime_error(to<std::string>(
"LZMA2Codec: invalid uncompressed length"));
if (uncompressedLength != UNKNOWN_UNCOMPRESSED_LENGTH &&
uncompressedLength != stream.total_out) {
throw std::runtime_error(
to<std::string>("LZMA2Codec: invalid uncompressed length"));
}
return out;
......
......@@ -155,7 +155,7 @@ TEST(CompressionTestNeedsUncompressedLength, Simple) {
{ CodecType::SNAPPY, false },
{ CodecType::ZLIB, false },
{ CodecType::LZ4_VARINT_SIZE, false },
{ CodecType::LZMA2, true },
{ CodecType::LZMA2, false },
{ CodecType::LZMA2_VARINT_SIZE, false },
{ CodecType::ZSTD, false },
{ CodecType::GZIP, false },
......@@ -392,6 +392,7 @@ INSTANTIATE_TEST_CASE_P(
supportedCodecs({
CodecType::SNAPPY,
CodecType::ZLIB,
CodecType::LZMA2,
CodecType::ZSTD,
CodecType::LZ4_FRAME,
})));
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment