Commit f05cdbc1 authored by Lovro Puzar's avatar Lovro Puzar Committed by facebook-github-bot-4

Support gzip in folly/io:compression

Summary: gzip files can be easier to work with operationally than raw zlib thanks to gzip+gunzip.  zlib supports adding/stripping the gzip header; expose that through a new CodecType.

Reviewed By: chipturner

Differential Revision: D2759554

fb-gh-sync-id: 67e0f5f88e087d363db1c349cabb909fb3f00417
parent fbb8a926
......@@ -469,7 +469,7 @@ std::unique_ptr<Codec> ZlibCodec::create(int level, CodecType type) {
}
ZlibCodec::ZlibCodec(int level, CodecType type) : Codec(type) {
DCHECK(type == CodecType::ZLIB);
DCHECK(type == CodecType::ZLIB || type == CodecType::GZIP);
switch (level) {
case COMPRESSION_LEVEL_FASTEST:
level = 1;
......@@ -534,7 +534,22 @@ std::unique_ptr<IOBuf> ZlibCodec::doCompress(const IOBuf* data) {
stream.zfree = nullptr;
stream.opaque = nullptr;
int rc = deflateInit(&stream, level_);
// Using deflateInit2() to support gzip. "The windowBits parameter is the
// base two logarithm of the maximum window size (...) The default value is
// 15 (...) Add 16 to windowBits to write a simple gzip header and trailer
// around the compressed data instead of a zlib wrapper. The gzip header
// will have no file name, no extra data, no comment, no modification time
// (set to zero), no header crc, and the operating system will be set to 255
// (unknown)."
int windowBits = 15 + (type() == CodecType::GZIP ? 16 : 0);
// All other parameters (method, memLevel, strategy) get default values from
// the zlib manual.
int rc = deflateInit2(&stream,
level_,
Z_DEFLATED,
windowBits,
/* memLevel */ 8,
Z_DEFAULT_STRATEGY);
if (rc != Z_OK) {
throw std::runtime_error(to<std::string>(
"ZlibCodec: deflateInit error: ", rc, ": ", stream.msg));
......@@ -614,7 +629,11 @@ std::unique_ptr<IOBuf> ZlibCodec::doUncompress(const IOBuf* data,
stream.zfree = nullptr;
stream.opaque = nullptr;
int rc = inflateInit(&stream);
// "The windowBits parameter is the base two logarithm of the maximum window
// size (...) The default value is 15 (...) add 16 to decode only the gzip
// format (the zlib format will return a Z_DATA_ERROR)."
int windowBits = 15 + (type() == CodecType::GZIP ? 16 : 0);
int rc = inflateInit2(&stream, windowBits);
if (rc != Z_OK) {
throw std::runtime_error(to<std::string>(
"ZlibCodec: inflateInit error: ", rc, ": ", stream.msg));
......@@ -1063,6 +1082,12 @@ std::unique_ptr<Codec> getCodec(CodecType type, int level) {
#else
nullptr,
#endif
#if FOLLY_HAVE_LIBZ
ZlibCodec::create,
#else
nullptr,
#endif
};
size_t idx = static_cast<size_t>(type);
......
......@@ -80,7 +80,14 @@ enum class CodecType {
*/
ZSTD_BETA = 8,
NUM_CODEC_TYPES = 9,
/**
* Use gzip compression. This is the same compression algorithm as ZLIB but
* gzip-compressed files tend to be easier to work with from the command line.
* Levels supported: 0 = no compression, 1 = fast, ..., 9 = best; default = 6
*/
GZIP = 9,
NUM_CODEC_TYPES = 10,
};
class Codec {
......
......@@ -129,6 +129,7 @@ TEST(CompressionTestNeedsUncompressedLength, Simple) {
EXPECT_FALSE(getCodec(CodecType::LZMA2_VARINT_SIZE)
->needsUncompressedLength());
EXPECT_TRUE(getCodec(CodecType::ZSTD_BETA)->needsUncompressedLength());
EXPECT_FALSE(getCodec(CodecType::GZIP)->needsUncompressedLength());
}
class CompressionTest
......@@ -182,7 +183,8 @@ INSTANTIATE_TEST_CASE_P(
CodecType::LZ4_VARINT_SIZE,
CodecType::LZMA2,
CodecType::LZMA2_VARINT_SIZE,
CodecType::ZSTD_BETA)));
CodecType::ZSTD_BETA,
CodecType::GZIP)));
class CompressionVarintTest
: public testing::TestWithParam<std::tr1::tuple<int, CodecType>> {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment