Commit 2f002209 authored by Nick Terrell's avatar Nick Terrell Committed by Facebook Github Bot

Add BZIP2 stream codec

Summary:
Add the BZIP2 stream codec.

The `FlushOp::FLUSH` does not guarantee that the decompressor can read all the input processed so far, due to a bug in the bzip2 library. This is likely not important, since `FLUSH` is not a common operation, especially with bzip2.

Reviewed By: yfeldblum

Differential Revision: D9484325

fbshipit-source-id: 40770b6f301a16d86c4de8c2b0875f931f00cba2
parent 8cbbffc3
This diff is collapsed.
...@@ -63,6 +63,7 @@ enum class CodecType { ...@@ -63,6 +63,7 @@ enum class CodecType {
/** /**
* Use zlib compression. * Use zlib compression.
* Levels supported: 0 = no compression, 1 = fast, ..., 9 = best; default = 6 * Levels supported: 0 = no compression, 1 = fast, ..., 9 = best; default = 6
* Streaming compression is supported.
*/ */
ZLIB = 4, ZLIB = 4,
...@@ -74,6 +75,7 @@ enum class CodecType { ...@@ -74,6 +75,7 @@ enum class CodecType {
/** /**
* Use LZMA2 compression. * Use LZMA2 compression.
* Levels supported: 0 = no compression, 1 = fast, ..., 9 = best; default = 6 * Levels supported: 0 = no compression, 1 = fast, ..., 9 = best; default = 6
* Streaming compression is supported.
*/ */
LZMA2 = 6, LZMA2 = 6,
LZMA2_VARINT_SIZE = 7, LZMA2_VARINT_SIZE = 7,
...@@ -82,6 +84,7 @@ enum class CodecType { ...@@ -82,6 +84,7 @@ enum class CodecType {
* Use ZSTD compression. * Use ZSTD compression.
* Levels supported: 1 = fast, ..., 19 = best; default = 3 * Levels supported: 1 = fast, ..., 19 = best; default = 3
* Use ZSTD_FAST for the fastest zstd compression (negative levels). * Use ZSTD_FAST for the fastest zstd compression (negative levels).
* Streaming compression is supported.
*/ */
ZSTD = 8, ZSTD = 8,
...@@ -89,6 +92,7 @@ enum class CodecType { ...@@ -89,6 +92,7 @@ enum class CodecType {
* Use gzip compression. This is the same compression algorithm as ZLIB but * Use gzip compression. This is the same compression algorithm as ZLIB but
* gzip-compressed files tend to be easier to work with from the command line. * gzip-compressed files tend to be easier to work with from the command line.
* Levels supported: 0 = no compression, 1 = fast, ..., 9 = best; default = 6 * Levels supported: 0 = no compression, 1 = fast, ..., 9 = best; default = 6
* Streaming compression is supported.
*/ */
GZIP = 9, GZIP = 9,
...@@ -101,6 +105,9 @@ enum class CodecType { ...@@ -101,6 +105,9 @@ enum class CodecType {
/** /**
* Use bzip2 compression. * Use bzip2 compression.
* Levels supported: 1 = fast, 9 = best; default = 9 * Levels supported: 1 = fast, 9 = best; default = 9
* Streaming compression is supported BUT FlushOp::FLUSH does NOT ensure that
* the decompressor can read all the data up to that point, due to a bug in
* the bzip2 library.
*/ */
BZIP2 = 11, BZIP2 = 11,
...@@ -114,6 +121,7 @@ enum class CodecType { ...@@ -114,6 +121,7 @@ enum class CodecType {
* speed is around 25% faster than ZSTD. * speed is around 25% faster than ZSTD.
* This codec is fully compatible with ZSTD. * This codec is fully compatible with ZSTD.
* Levels supported: 1 = best, ..., 5 = fast; default = 1 * Levels supported: 1 = best, ..., 5 = fast; default = 1
* Streaming compression is supported.
*/ */
ZSTD_FAST = 12, ZSTD_FAST = 12,
......
...@@ -462,12 +462,20 @@ INSTANTIATE_TEST_CASE_P( ...@@ -462,12 +462,20 @@ INSTANTIATE_TEST_CASE_P(
CodecType::BZIP2, CodecType::BZIP2,
}))); })));
static bool codecHasFlush(CodecType type) {
return type != CodecType::BZIP2;
}
class StreamingUnitTest : public testing::TestWithParam<CodecType> { class StreamingUnitTest : public testing::TestWithParam<CodecType> {
protected: protected:
void SetUp() override { void SetUp() override {
codec_ = getStreamCodec(GetParam()); codec_ = getStreamCodec(GetParam());
} }
bool hasFlush() const {
return codecHasFlush(GetParam());
}
std::unique_ptr<StreamCodec> codec_; std::unique_ptr<StreamCodec> codec_;
}; };
...@@ -554,8 +562,10 @@ TEST_P(StreamingUnitTest, emptyData) { ...@@ -554,8 +562,10 @@ TEST_P(StreamingUnitTest, emptyData) {
codec_->resetStream(0); codec_->resetStream(0);
output = {largeBuffer->writableData(), largeBuffer->length()}; output = {largeBuffer->writableData(), largeBuffer->length()};
EXPECT_FALSE(codec_->compressStream(input, output)); EXPECT_FALSE(codec_->compressStream(input, output));
EXPECT_TRUE( if (hasFlush()) {
codec_->compressStream(input, output, StreamCodec::FlushOp::FLUSH)); EXPECT_TRUE(
codec_->compressStream(input, output, StreamCodec::FlushOp::FLUSH));
}
EXPECT_TRUE( EXPECT_TRUE(
codec_->compressStream(input, output, StreamCodec::FlushOp::END)); codec_->compressStream(input, output, StreamCodec::FlushOp::END));
} }
...@@ -564,17 +574,21 @@ TEST_P(StreamingUnitTest, emptyData) { ...@@ -564,17 +574,21 @@ TEST_P(StreamingUnitTest, emptyData) {
output = {}; output = {};
codec_->resetStream(); codec_->resetStream();
EXPECT_TRUE(codec_->uncompressStream(input, output)); EXPECT_TRUE(codec_->uncompressStream(input, output));
codec_->resetStream(); if (hasFlush()) {
EXPECT_TRUE( codec_->resetStream();
codec_->uncompressStream(input, output, StreamCodec::FlushOp::FLUSH)); EXPECT_TRUE(
codec_->uncompressStream(input, output, StreamCodec::FlushOp::FLUSH));
}
codec_->resetStream(); codec_->resetStream();
EXPECT_TRUE( EXPECT_TRUE(
codec_->uncompressStream(input, output, StreamCodec::FlushOp::END)); codec_->uncompressStream(input, output, StreamCodec::FlushOp::END));
codec_->resetStream(0); codec_->resetStream(0);
EXPECT_TRUE(codec_->uncompressStream(input, output)); EXPECT_TRUE(codec_->uncompressStream(input, output));
codec_->resetStream(0); if (hasFlush()) {
EXPECT_TRUE( codec_->resetStream(0);
codec_->uncompressStream(input, output, StreamCodec::FlushOp::FLUSH)); EXPECT_TRUE(
codec_->uncompressStream(input, output, StreamCodec::FlushOp::FLUSH));
}
codec_->resetStream(0); codec_->resetStream(0);
EXPECT_TRUE( EXPECT_TRUE(
codec_->uncompressStream(input, output, StreamCodec::FlushOp::END)); codec_->uncompressStream(input, output, StreamCodec::FlushOp::END));
...@@ -600,6 +614,9 @@ TEST_P(StreamingUnitTest, noForwardProgress) { ...@@ -600,6 +614,9 @@ TEST_P(StreamingUnitTest, noForwardProgress) {
// No progress is not okay twice in a row for all flush operations when // No progress is not okay twice in a row for all flush operations when
// compressing // compressing
for (const auto flushOp : flushOps) { for (const auto flushOp : flushOps) {
if (flushOp == StreamCodec::FlushOp::FLUSH && !hasFlush()) {
continue;
}
if (codec_->needsDataLength()) { if (codec_->needsDataLength()) {
codec_->resetStream(inBuffer->computeChainDataLength()); codec_->resetStream(inBuffer->computeChainDataLength());
} else { } else {
...@@ -621,6 +638,9 @@ TEST_P(StreamingUnitTest, noForwardProgress) { ...@@ -621,6 +638,9 @@ TEST_P(StreamingUnitTest, noForwardProgress) {
// No progress is not okay twice in a row for all flush operations when // No progress is not okay twice in a row for all flush operations when
// uncompressing // uncompressing
for (const auto flushOp : flushOps) { for (const auto flushOp : flushOps) {
if (flushOp == StreamCodec::FlushOp::FLUSH && !hasFlush()) {
continue;
}
codec_->resetStream(); codec_->resetStream();
auto input = compressed->coalesce(); auto input = compressed->coalesce();
// Remove the last byte so the operation is incomplete // Remove the last byte so the operation is incomplete
...@@ -681,35 +701,43 @@ TEST_P(StreamingUnitTest, stateTransitions) { ...@@ -681,35 +701,43 @@ TEST_P(StreamingUnitTest, stateTransitions) {
codec_->resetStream(); codec_->resetStream();
EXPECT_FALSE(compress()); EXPECT_FALSE(compress());
EXPECT_FALSE(compress()); EXPECT_FALSE(compress());
EXPECT_TRUE(compress(StreamCodec::FlushOp::FLUSH)); if (hasFlush()) {
EXPECT_TRUE(compress(StreamCodec::FlushOp::FLUSH));
}
EXPECT_FALSE(compress()); EXPECT_FALSE(compress());
EXPECT_TRUE(compress(StreamCodec::FlushOp::END)); EXPECT_TRUE(compress(StreamCodec::FlushOp::END));
} }
codec_->resetStream(in.size() * 5); codec_->resetStream(in.size() * 5);
compress_all(false); compress_all(false);
compress_all(false); compress_all(false);
compress_all(true, StreamCodec::FlushOp::FLUSH); if (hasFlush()) {
compress_all(true, StreamCodec::FlushOp::FLUSH);
}
compress_all(false); compress_all(false);
compress_all(true, StreamCodec::FlushOp::END); compress_all(true, StreamCodec::FlushOp::END);
// uncompression flow // uncompression flow
codec_->resetStream(); codec_->resetStream();
EXPECT_FALSE(uncompress(StreamCodec::FlushOp::NONE, true)); EXPECT_FALSE(uncompress(StreamCodec::FlushOp::NONE, true));
codec_->resetStream(); if (hasFlush()) {
EXPECT_FALSE(uncompress(StreamCodec::FlushOp::FLUSH, true)); codec_->resetStream();
EXPECT_FALSE(uncompress(StreamCodec::FlushOp::FLUSH, true));
}
codec_->resetStream(); codec_->resetStream();
EXPECT_FALSE(uncompress(StreamCodec::FlushOp::NONE, true)); EXPECT_FALSE(uncompress(StreamCodec::FlushOp::NONE, true));
codec_->resetStream(); codec_->resetStream();
EXPECT_FALSE(uncompress(StreamCodec::FlushOp::NONE, true)); EXPECT_FALSE(uncompress(StreamCodec::FlushOp::NONE, true));
codec_->resetStream(); if (hasFlush()) {
EXPECT_TRUE(uncompress(StreamCodec::FlushOp::FLUSH)); codec_->resetStream();
EXPECT_TRUE(uncompress(StreamCodec::FlushOp::FLUSH));
}
// compress -> uncompress // compress -> uncompress
codec_->resetStream(in.size()); codec_->resetStream(in.size());
EXPECT_FALSE(compress()); EXPECT_FALSE(compress());
EXPECT_THROW(uncompress(), std::logic_error); EXPECT_THROW(uncompress(), std::logic_error);
// uncompress -> compress // uncompress -> compress
codec_->resetStream(inBuffer->computeChainDataLength()); codec_->resetStream(inBuffer->computeChainDataLength());
EXPECT_TRUE(uncompress(StreamCodec::FlushOp::FLUSH)); EXPECT_TRUE(uncompress(StreamCodec::FlushOp::NONE));
EXPECT_THROW(compress(), std::logic_error); EXPECT_THROW(compress(), std::logic_error);
// end -> compress // end -> compress
if (!codec_->needsDataLength()) { if (!codec_->needsDataLength()) {
...@@ -724,16 +752,20 @@ TEST_P(StreamingUnitTest, stateTransitions) { ...@@ -724,16 +752,20 @@ TEST_P(StreamingUnitTest, stateTransitions) {
EXPECT_THROW(compress(), std::logic_error); EXPECT_THROW(compress(), std::logic_error);
// end -> uncompress // end -> uncompress
codec_->resetStream(); codec_->resetStream();
EXPECT_TRUE(uncompress(StreamCodec::FlushOp::FLUSH)); EXPECT_TRUE(uncompress(StreamCodec::FlushOp::END));
EXPECT_THROW(uncompress(), std::logic_error); EXPECT_THROW(uncompress(), std::logic_error);
// flush -> compress // flush -> compress
codec_->resetStream(in.size()); if (hasFlush()) {
EXPECT_FALSE(compress(StreamCodec::FlushOp::FLUSH, true)); codec_->resetStream(in.size());
EXPECT_THROW(compress(), std::logic_error); EXPECT_FALSE(compress(StreamCodec::FlushOp::FLUSH, true));
EXPECT_THROW(compress(), std::logic_error);
}
// flush -> end // flush -> end
codec_->resetStream(in.size()); if (hasFlush()) {
EXPECT_FALSE(compress(StreamCodec::FlushOp::FLUSH, true)); codec_->resetStream(in.size());
EXPECT_THROW(compress(StreamCodec::FlushOp::END), std::logic_error); EXPECT_FALSE(compress(StreamCodec::FlushOp::FLUSH, true));
EXPECT_THROW(compress(StreamCodec::FlushOp::END), std::logic_error);
}
// undefined -> compress // undefined -> compress
codec_->compress(inBuffer.get()); codec_->compress(inBuffer.get());
EXPECT_THROW(compress(), std::logic_error); EXPECT_THROW(compress(), std::logic_error);
...@@ -752,6 +784,10 @@ INSTANTIATE_TEST_CASE_P( ...@@ -752,6 +784,10 @@ INSTANTIATE_TEST_CASE_P(
class StreamingCompressionTest class StreamingCompressionTest
: public testing::TestWithParam<std::tuple<int, int, CodecType>> { : public testing::TestWithParam<std::tuple<int, int, CodecType>> {
protected: protected:
bool hasFlush() const {
return codecHasFlush(std::get<2>(GetParam()));
}
void SetUp() override { void SetUp() override {
auto const tup = GetParam(); auto const tup = GetParam();
uncompressedLength_ = uint64_t(1) << std::get<0>(tup); uncompressedLength_ = uint64_t(1) << std::get<0>(tup);
...@@ -874,6 +910,8 @@ TEST_P(StreamingCompressionTest, compressStream) { ...@@ -874,6 +910,8 @@ TEST_P(StreamingCompressionTest, compressStream) {
void StreamingCompressionTest::runUncompressStreamTest( void StreamingCompressionTest::runUncompressStreamTest(
const folly::io::test::DataHolder& dh) { const folly::io::test::DataHolder& dh) {
const auto flush =
hasFlush() ? StreamCodec::FlushOp::FLUSH : StreamCodec::FlushOp::NONE;
auto const data = IOBuf::wrapBuffer(dh.data(uncompressedLength_)); auto const data = IOBuf::wrapBuffer(dh.data(uncompressedLength_));
// Concatenate 3 compressed frames in a row // Concatenate 3 compressed frames in a row
auto compressed = codec_->compress(data.get()); auto compressed = codec_->compress(data.get());
...@@ -884,8 +922,7 @@ void StreamingCompressionTest::runUncompressStreamTest( ...@@ -884,8 +922,7 @@ void StreamingCompressionTest::runUncompressStreamTest(
// Uncompress the first frame // Uncompress the first frame
codec_->resetStream(data->computeChainDataLength()); codec_->resetStream(data->computeChainDataLength());
{ {
auto const result = uncompressSome( auto const result = uncompressSome(codec_.get(), input, chunkSize_, flush);
codec_.get(), input, chunkSize_, StreamCodec::FlushOp::FLUSH);
ASSERT_TRUE(result.first); ASSERT_TRUE(result.first);
ASSERT_EQ(hashIOBuf(data.get()), hashIOBuf(result.second.get())); ASSERT_EQ(hashIOBuf(data.get()), hashIOBuf(result.second.get()));
} }
...@@ -900,8 +937,7 @@ void StreamingCompressionTest::runUncompressStreamTest( ...@@ -900,8 +937,7 @@ void StreamingCompressionTest::runUncompressStreamTest(
// Uncompress the third frame // Uncompress the third frame
codec_->resetStream(); codec_->resetStream();
{ {
auto const result = uncompressSome( auto const result = uncompressSome(codec_.get(), input, chunkSize_, flush);
codec_.get(), input, chunkSize_, StreamCodec::FlushOp::FLUSH);
ASSERT_TRUE(result.first); ASSERT_TRUE(result.first);
ASSERT_EQ(hashIOBuf(data.get()), hashIOBuf(result.second.get())); ASSERT_EQ(hashIOBuf(data.get()), hashIOBuf(result.second.get()));
} }
...@@ -945,6 +981,9 @@ void StreamingCompressionTest::runFlushTest(DataHolder const& dh) { ...@@ -945,6 +981,9 @@ void StreamingCompressionTest::runFlushTest(DataHolder const& dh) {
} }
TEST_P(StreamingCompressionTest, testFlush) { TEST_P(StreamingCompressionTest, testFlush) {
if (!hasFlush()) {
return;
}
runFlushTest(constantDataHolder); runFlushTest(constantDataHolder);
runFlushTest(randomDataHolder); runFlushTest(randomDataHolder);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment