Commit 2ca5653c authored by Stella Lau's avatar Stella Lau Committed by Facebook Github Bot

Add zlib-specific codec initialization

Summary:
- Create interface to initialize zlib codec using specific parameters
- This enables the raw inflate/deflate and auto inflate options
- Add tests for option initialization

Reviewed By: terrelln, yfeldblum

Differential Revision: D5649980

fbshipit-source-id: fd36e8edc0e8c528cd6c9d8f39e8ef839b6acfef
parent f372f154
......@@ -298,6 +298,8 @@ nobase_follyinclude_HEADERS = \
io/async/test/TimeUtil.h \
io/async/test/UndelayedDestruction.h \
io/async/test/Util.h \
io/compression/Utils.h \
io/compression/Zlib.h \
Iterator.h \
json.h \
Launder.h \
......@@ -533,6 +535,7 @@ libfolly_la_SOURCES = \
io/async/test/TimeUtil.cpp \
io/async/ssl/OpenSSLUtils.cpp \
io/async/ssl/SSLErrors.cpp \
io/compression/Zlib.cpp \
json.cpp \
detail/MemoryIdler.cpp \
detail/SocketFastOpen.cpp \
......
......@@ -32,7 +32,7 @@
#endif
#if FOLLY_HAVE_LIBZ
#include <zlib.h>
#include <folly/io/compression/Zlib.h>
#endif
#if FOLLY_HAVE_LIBLZMA
......@@ -55,9 +55,15 @@
#include <folly/ScopeGuard.h>
#include <folly/Varint.h>
#include <folly/io/Cursor.h>
#include <folly/io/compression/Utils.h>
#include <algorithm>
#include <unordered_set>
using folly::io::compression::detail::dataStartsWithLE;
using folly::io::compression::detail::prefixToStringLE;
namespace zlib = folly::io::zlib;
namespace folly {
namespace io {
......@@ -517,39 +523,6 @@ inline uint64_t decodeVarintFromCursor(folly::io::Cursor& cursor) {
#endif // FOLLY_HAVE_LIBLZ4 || FOLLY_HAVE_LIBLZMA
namespace {
/**
* Reads sizeof(T) bytes, and returns false if not enough bytes are available.
* Returns true if the first n bytes are equal to prefix when interpreted as
* a little endian T.
*/
template <typename T>
typename std::enable_if<std::is_unsigned<T>::value, bool>::type
dataStartsWithLE(const IOBuf* data, T prefix, uint64_t n = sizeof(T)) {
DCHECK_GT(n, 0);
DCHECK_LE(n, sizeof(T));
T value;
Cursor cursor{data};
if (!cursor.tryReadLE(value)) {
return false;
}
const T mask = n == sizeof(T) ? T(-1) : (T(1) << (8 * n)) - 1;
return prefix == (value & mask);
}
template <typename T>
typename std::enable_if<std::is_arithmetic<T>::value, std::string>::type
prefixToStringLE(T prefix, uint64_t n = sizeof(T)) {
DCHECK_GT(n, 0);
DCHECK_LE(n, sizeof(T));
prefix = Endian::little(prefix);
std::string result;
result.resize(n);
memcpy(&result[0], &prefix, n);
return result;
}
} // namespace
#if FOLLY_HAVE_LIBLZ4
/**
......@@ -1005,300 +978,6 @@ std::unique_ptr<IOBuf> SnappyCodec::doUncompress(
#endif // FOLLY_HAVE_LIBSNAPPY
#if FOLLY_HAVE_LIBZ
/**
* Zlib codec
*/
class ZlibStreamCodec final : public StreamCodec {
public:
static std::unique_ptr<Codec> createCodec(int level, CodecType type);
static std::unique_ptr<StreamCodec> createStream(int level, CodecType type);
explicit ZlibStreamCodec(int level, CodecType type);
~ZlibStreamCodec() override;
std::vector<std::string> validPrefixes() const override;
bool canUncompress(const IOBuf* data, Optional<uint64_t> uncompressedLength)
const override;
private:
uint64_t doMaxCompressedLength(uint64_t uncompressedLength) const override;
void doResetStream() override;
bool doCompressStream(
ByteRange& input,
MutableByteRange& output,
StreamCodec::FlushOp flush) override;
bool doUncompressStream(
ByteRange& input,
MutableByteRange& output,
StreamCodec::FlushOp flush) override;
void resetDeflateStream();
void resetInflateStream();
Optional<z_stream> deflateStream_{};
Optional<z_stream> inflateStream_{};
int level_;
bool needReset_{true};
};
static constexpr uint16_t kGZIPMagicLE = 0x8B1F;
std::vector<std::string> ZlibStreamCodec::validPrefixes() const {
if (type() == CodecType::ZLIB) {
// Zlib streams start with a 2 byte header.
//
// 0 1
// +---+---+
// |CMF|FLG|
// +---+---+
//
// We won't restrict the values of any sub-fields except as described below.
//
// The lowest 4 bits of CMF is the compression method (CM).
// CM == 0x8 is the deflate compression method, which is currently the only
// supported compression method, so any valid prefix must have CM == 0x8.
//
// The lowest 5 bits of FLG is FCHECK.
// FCHECK must be such that the two header bytes are a multiple of 31 when
// interpreted as a big endian 16-bit number.
std::vector<std::string> result;
// 16 values for the first byte, 8 values for the second byte.
// There are also 4 combinations where both 0x00 and 0x1F work as FCHECK.
result.reserve(132);
// Select all values for the CMF byte that use the deflate algorithm 0x8.
for (uint32_t first = 0x0800; first <= 0xF800; first += 0x1000) {
// Select all values for the FLG, but leave FCHECK as 0 since it's fixed.
for (uint32_t second = 0x00; second <= 0xE0; second += 0x20) {
uint16_t prefix = first | second;
// Compute FCHECK.
prefix += 31 - (prefix % 31);
result.push_back(prefixToStringLE(Endian::big(prefix)));
// zlib won't produce this, but it is a valid prefix.
if ((prefix & 0x1F) == 31) {
prefix -= 31;
result.push_back(prefixToStringLE(Endian::big(prefix)));
}
}
}
return result;
} else {
// The gzip frame starts with 2 magic bytes.
return {prefixToStringLE(kGZIPMagicLE)};
}
}
bool ZlibStreamCodec::canUncompress(const IOBuf* data, Optional<uint64_t>)
const {
if (type() == CodecType::ZLIB) {
uint16_t value;
Cursor cursor{data};
if (!cursor.tryReadBE(value)) {
return false;
}
// zlib compressed if using deflate and is a multiple of 31.
return (value & 0x0F00) == 0x0800 && value % 31 == 0;
} else {
return dataStartsWithLE(data, kGZIPMagicLE);
}
}
uint64_t ZlibStreamCodec::doMaxCompressedLength(
uint64_t uncompressedLength) const {
return deflateBound(nullptr, uncompressedLength);
}
std::unique_ptr<Codec> ZlibStreamCodec::createCodec(int level, CodecType type) {
return std::make_unique<ZlibStreamCodec>(level, type);
}
std::unique_ptr<StreamCodec> ZlibStreamCodec::createStream(
int level,
CodecType type) {
return std::make_unique<ZlibStreamCodec>(level, type);
}
ZlibStreamCodec::ZlibStreamCodec(int level, CodecType type)
: StreamCodec(type) {
DCHECK(type == CodecType::ZLIB || type == CodecType::GZIP);
switch (level) {
case COMPRESSION_LEVEL_FASTEST:
level = 1;
break;
case COMPRESSION_LEVEL_DEFAULT:
level = Z_DEFAULT_COMPRESSION;
break;
case COMPRESSION_LEVEL_BEST:
level = 9;
break;
}
if (level != Z_DEFAULT_COMPRESSION && (level < 0 || level > 9)) {
throw std::invalid_argument(
to<std::string>("ZlibStreamCodec: invalid level: ", level));
}
level_ = level;
}
ZlibStreamCodec::~ZlibStreamCodec() {
if (deflateStream_) {
deflateEnd(deflateStream_.get_pointer());
deflateStream_.clear();
}
if (inflateStream_) {
inflateEnd(inflateStream_.get_pointer());
inflateStream_.clear();
}
}
void ZlibStreamCodec::doResetStream() {
needReset_ = true;
}
void ZlibStreamCodec::resetDeflateStream() {
if (deflateStream_) {
int const rc = deflateReset(deflateStream_.get_pointer());
if (rc != Z_OK) {
deflateStream_.clear();
throw std::runtime_error(
to<std::string>("ZlibStreamCodec: deflateReset error: ", rc));
}
return;
}
deflateStream_ = z_stream{};
// Using deflateInit2() to support gzip. "The windowBits parameter is the
// base two logarithm of the maximum window size (...) The default value is
// 15 (...) Add 16 to windowBits to write a simple gzip header and trailer
// around the compressed data instead of a zlib wrapper. The gzip header
// will have no file name, no extra data, no comment, no modification time
// (set to zero), no header crc, and the operating system will be set to 255
// (unknown)."
int const windowBits = 15 + (type() == CodecType::GZIP ? 16 : 0);
// All other parameters (method, memLevel, strategy) get default values from
// the zlib manual.
int const rc = deflateInit2(
deflateStream_.get_pointer(),
level_,
Z_DEFLATED,
windowBits,
/* memLevel */ 8,
Z_DEFAULT_STRATEGY);
if (rc != Z_OK) {
deflateStream_.clear();
throw std::runtime_error(
to<std::string>("ZlibStreamCodec: deflateInit error: ", rc));
}
}
void ZlibStreamCodec::resetInflateStream() {
if (inflateStream_) {
int const rc = inflateReset(inflateStream_.get_pointer());
if (rc != Z_OK) {
inflateStream_.clear();
throw std::runtime_error(
to<std::string>("ZlibStreamCodec: inflateReset error: ", rc));
}
return;
}
inflateStream_ = z_stream{};
// "The windowBits parameter is the base two logarithm of the maximum window
// size (...) The default value is 15 (...) add 16 to decode only the gzip
// format (the zlib format will return a Z_DATA_ERROR)."
int const windowBits = 15 + (type() == CodecType::GZIP ? 16 : 0);
int const rc = inflateInit2(inflateStream_.get_pointer(), windowBits);
if (rc != Z_OK) {
inflateStream_.clear();
throw std::runtime_error(
to<std::string>("ZlibStreamCodec: inflateInit error: ", rc));
}
}
static int zlibTranslateFlush(StreamCodec::FlushOp flush) {
switch (flush) {
case StreamCodec::FlushOp::NONE:
return Z_NO_FLUSH;
case StreamCodec::FlushOp::FLUSH:
return Z_SYNC_FLUSH;
case StreamCodec::FlushOp::END:
return Z_FINISH;
default:
throw std::invalid_argument("ZlibStreamCodec: Invalid flush");
}
}
static int zlibThrowOnError(int rc) {
switch (rc) {
case Z_OK:
case Z_BUF_ERROR:
case Z_STREAM_END:
return rc;
default:
throw std::runtime_error(to<std::string>("ZlibStreamCodec: error: ", rc));
}
}
bool ZlibStreamCodec::doCompressStream(
ByteRange& input,
MutableByteRange& output,
StreamCodec::FlushOp flush) {
if (needReset_) {
resetDeflateStream();
needReset_ = false;
}
DCHECK(deflateStream_.hasValue());
// zlib will return Z_STREAM_ERROR if output.data() is null.
if (output.data() == nullptr) {
return false;
}
deflateStream_->next_in = const_cast<uint8_t*>(input.data());
deflateStream_->avail_in = input.size();
deflateStream_->next_out = output.data();
deflateStream_->avail_out = output.size();
SCOPE_EXIT {
input.uncheckedAdvance(input.size() - deflateStream_->avail_in);
output.uncheckedAdvance(output.size() - deflateStream_->avail_out);
};
int const rc = zlibThrowOnError(
deflate(deflateStream_.get_pointer(), zlibTranslateFlush(flush)));
switch (flush) {
case StreamCodec::FlushOp::NONE:
return false;
case StreamCodec::FlushOp::FLUSH:
return deflateStream_->avail_in == 0 && deflateStream_->avail_out != 0;
case StreamCodec::FlushOp::END:
return rc == Z_STREAM_END;
default:
throw std::invalid_argument("ZlibStreamCodec: Invalid flush");
}
}
bool ZlibStreamCodec::doUncompressStream(
ByteRange& input,
MutableByteRange& output,
StreamCodec::FlushOp flush) {
if (needReset_) {
resetInflateStream();
needReset_ = false;
}
DCHECK(inflateStream_.hasValue());
// zlib will return Z_STREAM_ERROR if output.data() is null.
if (output.data() == nullptr) {
return false;
}
inflateStream_->next_in = const_cast<uint8_t*>(input.data());
inflateStream_->avail_in = input.size();
inflateStream_->next_out = output.data();
inflateStream_->avail_out = output.size();
SCOPE_EXIT {
input.advance(input.size() - inflateStream_->avail_in);
output.advance(output.size() - inflateStream_->avail_out);
};
int const rc = zlibThrowOnError(
inflate(inflateStream_.get_pointer(), zlibTranslateFlush(flush)));
return rc == Z_STREAM_END;
}
#endif // FOLLY_HAVE_LIBZ
#if FOLLY_HAVE_LIBLZMA
/**
......@@ -2047,6 +1726,24 @@ std::unique_ptr<IOBuf> Bzip2Codec::doUncompress(
#endif // FOLLY_HAVE_LIBBZ2
#if FOLLY_HAVE_LIBZ
zlib::Options getZlibOptions(CodecType type) {
DCHECK(type == CodecType::GZIP || type == CodecType::ZLIB);
return type == CodecType::GZIP ? zlib::defaultGzipOptions()
: zlib::defaultZlibOptions();
}
std::unique_ptr<Codec> getZlibCodec(int level, CodecType type) {
return zlib::getCodec(getZlibOptions(type), level);
}
std::unique_ptr<StreamCodec> getZlibStreamCodec(int level, CodecType type) {
return zlib::getStreamCodec(getZlibOptions(type), level);
}
#endif // FOLLY_HAVE_LIBZ
/**
* Automatic decompression
*/
......@@ -2236,7 +1933,7 @@ constexpr Factory
#endif
#if FOLLY_HAVE_LIBZ
{ZlibStreamCodec::createCodec, ZlibStreamCodec::createStream},
{getZlibCodec, getZlibStreamCodec},
#else
{},
#endif
......@@ -2262,7 +1959,7 @@ constexpr Factory
#endif
#if FOLLY_HAVE_LIBZ
{ZlibStreamCodec::createCodec, ZlibStreamCodec::createStream},
{getZlibCodec, getZlibStreamCodec},
#else
{},
#endif
......
/*
* Copyright 2017 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <type_traits>
#include <folly/Bits.h>
#include <folly/io/Cursor.h>
#include <folly/io/IOBuf.h>
/**
* Helper functions for compression codecs.
*/
namespace folly {
namespace io {
namespace compression {
namespace detail {
/**
* Reads sizeof(T) bytes, and returns false if not enough bytes are available.
* Returns true if the first n bytes are equal to prefix when interpreted as
* a little endian T.
*/
template <typename T>
typename std::enable_if<std::is_unsigned<T>::value, bool>::type
dataStartsWithLE(const IOBuf* data, T prefix, uint64_t n = sizeof(T)) {
DCHECK_GT(n, 0);
DCHECK_LE(n, sizeof(T));
T value;
Cursor cursor{data};
if (!cursor.tryReadLE(value)) {
return false;
}
const T mask = n == sizeof(T) ? T(-1) : (T(1) << (8 * n)) - 1;
return prefix == (value & mask);
}
template <typename T>
typename std::enable_if<std::is_arithmetic<T>::value, std::string>::type
prefixToStringLE(T prefix, uint64_t n = sizeof(T)) {
DCHECK_GT(n, 0);
DCHECK_LE(n, sizeof(T));
prefix = Endian::little(prefix);
std::string result;
result.resize(n);
memcpy(&result[0], &prefix, n);
return result;
}
} // namespace detail
} // namespace compression
} // namespace io
} // namespace folly
/*
* Copyright 2017 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <folly/io/compression/Zlib.h>
#if FOLLY_HAVE_LIBZ
#include <folly/Conv.h>
#include <folly/Optional.h>
#include <folly/Range.h>
#include <folly/ScopeGuard.h>
#include <folly/io/Compression.h>
#include <folly/io/Cursor.h>
#include <folly/io/compression/Utils.h>
using folly::io::compression::detail::dataStartsWithLE;
using folly::io::compression::detail::prefixToStringLE;
namespace folly {
namespace io {
namespace zlib {
namespace {
bool isValidStrategy(int strategy) {
std::array<int, 5> strategies{{
Z_DEFAULT_STRATEGY,
Z_FILTERED,
Z_HUFFMAN_ONLY,
Z_RLE,
Z_FIXED
}};
return std::any_of(strategies.begin(), strategies.end(), [&](int i) {
return i == strategy;
});
}
int getWindowBits(Options::Format format, int windowSize) {
switch (format) {
case Options::Format::ZLIB:
return windowSize;
case Options::Format::GZIP:
return windowSize + 16;
case Options::Format::RAW:
return -windowSize;
case Options::Format::AUTO:
return windowSize + 32;
default:
return windowSize;
}
}
CodecType getCodecType(Options options) {
if (options.windowSize == 15 && options.format == Options::Format::ZLIB) {
return CodecType::ZLIB;
} else if (
options.windowSize == 15 && options.format == Options::Format::GZIP) {
return CodecType::GZIP;
} else {
return CodecType::USER_DEFINED;
}
}
class ZlibStreamCodec final : public StreamCodec {
public:
static std::unique_ptr<Codec> createCodec(Options options, int level);
static std::unique_ptr<StreamCodec> createStream(Options options, int level);
explicit ZlibStreamCodec(Options options, int level);
~ZlibStreamCodec() override;
std::vector<std::string> validPrefixes() const override;
bool canUncompress(const IOBuf* data, Optional<uint64_t> uncompressedLength)
const override;
private:
uint64_t doMaxCompressedLength(uint64_t uncompressedLength) const override;
void doResetStream() override;
bool doCompressStream(
ByteRange& input,
MutableByteRange& output,
StreamCodec::FlushOp flush) override;
bool doUncompressStream(
ByteRange& input,
MutableByteRange& output,
StreamCodec::FlushOp flush) override;
void resetDeflateStream();
void resetInflateStream();
Options options_;
Optional<z_stream> deflateStream_{};
Optional<z_stream> inflateStream_{};
int level_;
bool needReset_{true};
};
static constexpr uint16_t kGZIPMagicLE = 0x8B1F;
std::vector<std::string> ZlibStreamCodec::validPrefixes() const {
if (type() == CodecType::ZLIB) {
// Zlib streams start with a 2 byte header.
//
// 0 1
// +---+---+
// |CMF|FLG|
// +---+---+
//
// We won't restrict the values of any sub-fields except as described below.
//
// The lowest 4 bits of CMF is the compression method (CM).
// CM == 0x8 is the deflate compression method, which is currently the only
// supported compression method, so any valid prefix must have CM == 0x8.
//
// The lowest 5 bits of FLG is FCHECK.
// FCHECK must be such that the two header bytes are a multiple of 31 when
// interpreted as a big endian 16-bit number.
std::vector<std::string> result;
// 16 values for the first byte, 8 values for the second byte.
// There are also 4 combinations where both 0x00 and 0x1F work as FCHECK.
result.reserve(132);
// Select all values for the CMF byte that use the deflate algorithm 0x8.
for (uint32_t first = 0x0800; first <= 0xF800; first += 0x1000) {
// Select all values for the FLG, but leave FCHECK as 0 since it's fixed.
for (uint32_t second = 0x00; second <= 0xE0; second += 0x20) {
uint16_t prefix = first | second;
// Compute FCHECK.
prefix += 31 - (prefix % 31);
result.push_back(prefixToStringLE(Endian::big(prefix)));
// zlib won't produce this, but it is a valid prefix.
if ((prefix & 0x1F) == 31) {
prefix -= 31;
result.push_back(prefixToStringLE(Endian::big(prefix)));
}
}
}
return result;
} else if (type() == CodecType::GZIP) {
// The gzip frame starts with 2 magic bytes.
return {prefixToStringLE(kGZIPMagicLE)};
} else {
return {};
}
}
bool ZlibStreamCodec::canUncompress(const IOBuf* data, Optional<uint64_t>)
const {
if (type() == CodecType::ZLIB) {
uint16_t value;
Cursor cursor{data};
if (!cursor.tryReadBE(value)) {
return false;
}
// zlib compressed if using deflate and is a multiple of 31.
return (value & 0x0F00) == 0x0800 && value % 31 == 0;
} else if (type() == CodecType::GZIP) {
return dataStartsWithLE(data, kGZIPMagicLE);
} else {
return false;
}
}
uint64_t ZlibStreamCodec::doMaxCompressedLength(
uint64_t uncompressedLength) const {
return deflateBound(nullptr, uncompressedLength);
}
std::unique_ptr<Codec> ZlibStreamCodec::createCodec(
Options options,
int level) {
return std::make_unique<ZlibStreamCodec>(options, level);
}
std::unique_ptr<StreamCodec> ZlibStreamCodec::createStream(
Options options,
int level) {
return std::make_unique<ZlibStreamCodec>(options, level);
}
ZlibStreamCodec::ZlibStreamCodec(Options options, int level)
: StreamCodec(getCodecType(options)) {
switch (level) {
case COMPRESSION_LEVEL_FASTEST:
level = 1;
break;
case COMPRESSION_LEVEL_DEFAULT:
level = Z_DEFAULT_COMPRESSION;
break;
case COMPRESSION_LEVEL_BEST:
level = 9;
break;
}
auto inBounds = [](int value, int low, int high) {
return (value >= low) && (value <= high);
};
if (level != Z_DEFAULT_COMPRESSION && !inBounds(level, 0, 9)) {
throw std::invalid_argument(
to<std::string>("ZlibStreamCodec: invalid level: ", level));
}
level_ = level;
options_ = options;
// Although zlib allows a windowSize of 8..15, a value of 8 is not
// properly supported and is treated as a value of 9. This means data deflated
// with windowSize==8 can not be re-inflated with windowSize==8. windowSize==8
// is also not supported for gzip and raw deflation.
// Hence, the codec supports only 9..15.
if (!inBounds(options_.windowSize, 9, 15)) {
throw std::invalid_argument(to<std::string>(
"ZlibStreamCodec: invalid windowSize option: ", options.windowSize));
}
if (!inBounds(options_.memLevel, 1, 9)) {
throw std::invalid_argument(to<std::string>(
"ZlibStreamCodec: invalid memLevel option: ", options.memLevel));
}
if (!isValidStrategy(options_.strategy)) {
throw std::invalid_argument(to<std::string>(
"ZlibStreamCodec: invalid strategy: ", options.strategy));
}
}
ZlibStreamCodec::~ZlibStreamCodec() {
if (deflateStream_) {
deflateEnd(deflateStream_.get_pointer());
deflateStream_.clear();
}
if (inflateStream_) {
inflateEnd(inflateStream_.get_pointer());
inflateStream_.clear();
}
}
void ZlibStreamCodec::doResetStream() {
needReset_ = true;
}
void ZlibStreamCodec::resetDeflateStream() {
if (deflateStream_) {
int const rc = deflateReset(deflateStream_.get_pointer());
if (rc != Z_OK) {
deflateStream_.clear();
throw std::runtime_error(
to<std::string>("ZlibStreamCodec: deflateReset error: ", rc));
}
return;
}
deflateStream_ = z_stream{};
// The automatic header detection format is only for inflation.
// Use zlib for deflation if the format is auto.
int const windowBits = getWindowBits(
options_.format == Options::Format::AUTO ? Options::Format::ZLIB
: options_.format,
options_.windowSize);
int const rc = deflateInit2(
deflateStream_.get_pointer(),
level_,
Z_DEFLATED,
windowBits,
options_.memLevel,
options_.strategy);
if (rc != Z_OK) {
deflateStream_.clear();
throw std::runtime_error(
to<std::string>("ZlibStreamCodec: deflateInit error: ", rc));
}
}
void ZlibStreamCodec::resetInflateStream() {
if (inflateStream_) {
int const rc = inflateReset(inflateStream_.get_pointer());
if (rc != Z_OK) {
inflateStream_.clear();
throw std::runtime_error(
to<std::string>("ZlibStreamCodec: inflateReset error: ", rc));
}
return;
}
inflateStream_ = z_stream{};
int const rc = inflateInit2(
inflateStream_.get_pointer(),
getWindowBits(options_.format, options_.windowSize));
if (rc != Z_OK) {
inflateStream_.clear();
throw std::runtime_error(
to<std::string>("ZlibStreamCodec: inflateInit error: ", rc));
}
}
static int zlibTranslateFlush(StreamCodec::FlushOp flush) {
switch (flush) {
case StreamCodec::FlushOp::NONE:
return Z_NO_FLUSH;
case StreamCodec::FlushOp::FLUSH:
return Z_SYNC_FLUSH;
case StreamCodec::FlushOp::END:
return Z_FINISH;
default:
throw std::invalid_argument("ZlibStreamCodec: Invalid flush");
}
}
static int zlibThrowOnError(int rc) {
switch (rc) {
case Z_OK:
case Z_BUF_ERROR:
case Z_STREAM_END:
return rc;
default:
throw std::runtime_error(to<std::string>("ZlibStreamCodec: error: ", rc));
}
}
bool ZlibStreamCodec::doCompressStream(
ByteRange& input,
MutableByteRange& output,
StreamCodec::FlushOp flush) {
if (needReset_) {
resetDeflateStream();
needReset_ = false;
}
DCHECK(deflateStream_.hasValue());
// zlib will return Z_STREAM_ERROR if output.data() is null.
if (output.data() == nullptr) {
return false;
}
deflateStream_->next_in = const_cast<uint8_t*>(input.data());
deflateStream_->avail_in = input.size();
deflateStream_->next_out = output.data();
deflateStream_->avail_out = output.size();
SCOPE_EXIT {
input.uncheckedAdvance(input.size() - deflateStream_->avail_in);
output.uncheckedAdvance(output.size() - deflateStream_->avail_out);
};
int const rc = zlibThrowOnError(
deflate(deflateStream_.get_pointer(), zlibTranslateFlush(flush)));
switch (flush) {
case StreamCodec::FlushOp::NONE:
return false;
case StreamCodec::FlushOp::FLUSH:
return deflateStream_->avail_in == 0 && deflateStream_->avail_out != 0;
case StreamCodec::FlushOp::END:
return rc == Z_STREAM_END;
default:
throw std::invalid_argument("ZlibStreamCodec: Invalid flush");
}
}
bool ZlibStreamCodec::doUncompressStream(
ByteRange& input,
MutableByteRange& output,
StreamCodec::FlushOp flush) {
if (needReset_) {
resetInflateStream();
needReset_ = false;
}
DCHECK(inflateStream_.hasValue());
// zlib will return Z_STREAM_ERROR if output.data() is null.
if (output.data() == nullptr) {
return false;
}
inflateStream_->next_in = const_cast<uint8_t*>(input.data());
inflateStream_->avail_in = input.size();
inflateStream_->next_out = output.data();
inflateStream_->avail_out = output.size();
SCOPE_EXIT {
input.advance(input.size() - inflateStream_->avail_in);
output.advance(output.size() - inflateStream_->avail_out);
};
int const rc = zlibThrowOnError(
inflate(inflateStream_.get_pointer(), zlibTranslateFlush(flush)));
return rc == Z_STREAM_END;
}
} // namespace
Options defaultGzipOptions() {
return Options(Options::Format::GZIP);
}
Options defaultZlibOptions() {
return Options(Options::Format::ZLIB);
}
std::unique_ptr<Codec> getCodec(Options options, int level) {
return ZlibStreamCodec::createCodec(options, level);
}
std::unique_ptr<StreamCodec> getStreamCodec(Options options, int level) {
return ZlibStreamCodec::createStream(options, level);
}
} // namespace zlib
} // namespace io
} // namespace folly
#endif // FOLLY_HAVE_LIBZ
/*
* Copyright 2017 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <folly/Portability.h>
#include <folly/io/Compression.h>
#if FOLLY_HAVE_LIBZ
#include <zlib.h>
/**
* Interface for Zlib-specific codec initialization.
*/
namespace folly {
namespace io {
namespace zlib {
struct Options {
/**
* ZLIB: default option -- write a zlib wrapper as documented in RFC 1950.
*
* GZIP: write a simple gzip header and trailer around the compressed data
* instead of a zlib wrapper.
*
* RAW: deflate will generate raw deflate data with no zlib header or
* trailer, and will not compute a check value.
*
* AUTO: enable automatic header detection for decoding gzip or zlib data.
* For deflation, ZLIB will be used.
*/
enum class Format { ZLIB, GZIP, RAW, AUTO };
explicit Options(
Format format = Format::ZLIB,
int windowSize = 15,
int memLevel = 8,
int strategy = Z_DEFAULT_STRATEGY)
: format(format),
windowSize(windowSize),
memLevel(memLevel),
strategy(strategy) {}
Format format;
/**
* windowSize is the base two logarithm of the window size (the size of the
* history buffer). It should be in the range 9..15. Larger values of this
* parameter result in better compression at the expense of memory usage.
*
* The default value is 15.
*
* NB: when inflating/uncompressing data, the windowSize must be greater than
* or equal to the size used when deflating/compressing.
*/
int windowSize;
/**
* "The memLevel parameter specifies how much memory should be allocated for
* the internal compression state. memLevel=1 uses minimum memory but is slow
* and reduces compression ratio; memLevel=9 uses maximum memory for optimal
* speed. The default value is 8."
*/
int memLevel;
/**
* The strategy parameter is used to tune the compression algorithm.
* Supported values:
* - Z_DEFAULT_STRATEGY: normal data
* - Z_FILTERED: data produced by a filter (or predictor)
* - Z_HUFFMAN_ONLY: force Huffman encoding only (no string match)
* - Z_RLE: limit match distances to one
* - Z_FIXED: prevents the use of dynamic Huffman codes
*
* The strategy parameter only affects the compression ratio but not the
* correctness of the compressed output.
*/
int strategy;
};
/**
* Get the default options for gzip compression.
* A codec created with these options will have type CodecType::GZIP.
*/
Options defaultGzipOptions();
/**
* Get the default options for zlib compression.
* A codec created with these options will have type CodecType::ZLIB.
*/
Options defaultZlibOptions();
/**
* Get a codec with the given options and compression level.
*
* If the windowSize is 15 and the format is Format::ZLIB or Format::GZIP, then
* the type of the codec will be CodecType::ZLIB or CodecType::GZIP
* respectively. Otherwise, the type will be CodecType::USER_DEFINED.
*
* Automatic uncompression is not supported with USER_DEFINED codecs.
*
* Levels supported: 0 = no compression, 1 = fast, ..., 9 = best; default = 6
*/
std::unique_ptr<Codec> getCodec(
Options options = Options(),
int level = COMPRESSION_LEVEL_DEFAULT);
std::unique_ptr<StreamCodec> getStreamCodec(
Options options = Options(),
int level = COMPRESSION_LEVEL_DEFAULT);
} // namespace zlib
} // namespace io
} // namespace folly
#endif // FOLLY_HAVE_LIBZ
......@@ -38,6 +38,12 @@
#include <zstd.h>
#endif
#if FOLLY_HAVE_LIBZ
#include <folly/io/compression/Zlib.h>
#endif
namespace zlib = folly::io::zlib;
namespace folly {
namespace io {
namespace test {
......@@ -1129,6 +1135,118 @@ TEST(ZstdTest, BackwardCompatible) {
}
#endif
#if FOLLY_HAVE_LIBZ
using ZlibFormat = zlib::Options::Format;
TEST(ZlibTest, Auto) {
size_t const uncompressedLength_ = (size_t)1 << 15;
auto const original = std::string(
reinterpret_cast<const char*>(
randomDataHolder.data(uncompressedLength_).data()),
uncompressedLength_);
auto optionCodec = zlib::getCodec(zlib::Options(ZlibFormat::AUTO));
// Test the codec can uncompress zlib data.
{
auto codec = getCodec(CodecType::ZLIB);
auto const compressed = codec->compress(original);
auto const uncompressed = optionCodec->uncompress(compressed);
EXPECT_EQ(original, uncompressed);
}
// Test the codec can uncompress gzip data.
{
auto codec = getCodec(CodecType::GZIP);
auto const compressed = codec->compress(original);
auto const uncompressed = optionCodec->uncompress(compressed);
EXPECT_EQ(original, uncompressed);
}
}
TEST(ZlibTest, DefaultOptions) {
size_t const uncompressedLength_ = (size_t)1 << 20;
auto const original = std::string(
reinterpret_cast<const char*>(
randomDataHolder.data(uncompressedLength_).data()),
uncompressedLength_);
{
auto codec = getCodec(CodecType::ZLIB);
auto optionCodec = zlib::getCodec(zlib::defaultZlibOptions());
auto const compressed = optionCodec->compress(original);
auto uncompressed = codec->uncompress(compressed);
EXPECT_EQ(original, uncompressed);
uncompressed = optionCodec->uncompress(compressed);
EXPECT_EQ(original, uncompressed);
}
{
auto codec = getCodec(CodecType::GZIP);
auto optionCodec = zlib::getCodec(zlib::defaultGzipOptions());
auto const compressed = optionCodec->compress(original);
auto uncompressed = codec->uncompress(compressed);
EXPECT_EQ(original, uncompressed);
uncompressed = optionCodec->uncompress(compressed);
EXPECT_EQ(original, uncompressed);
}
}
class ZlibOptionsTest : public testing::TestWithParam<
std::tr1::tuple<ZlibFormat, int, int, int>> {
protected:
void SetUp() override {
auto tup = GetParam();
options_.format = std::tr1::get<0>(tup);
options_.windowSize = std::tr1::get<1>(tup);
options_.memLevel = std::tr1::get<2>(tup);
options_.strategy = std::tr1::get<3>(tup);
codec_ = zlib::getStreamCodec(options_);
}
void runSimpleRoundTripTest(const DataHolder& dh);
private:
zlib::Options options_;
std::unique_ptr<StreamCodec> codec_;
};
void ZlibOptionsTest::runSimpleRoundTripTest(const DataHolder& dh) {
size_t const uncompressedLength = (size_t)1 << 16;
auto const original = std::string(
reinterpret_cast<const char*>(dh.data(uncompressedLength).data()),
uncompressedLength);
auto const compressed = codec_->compress(original);
auto const uncompressed = codec_->uncompress(compressed);
EXPECT_EQ(uncompressed, original);
}
TEST_P(ZlibOptionsTest, simpleRoundTripTest) {
runSimpleRoundTripTest(constantDataHolder);
runSimpleRoundTripTest(randomDataHolder);
}
INSTANTIATE_TEST_CASE_P(
ZlibOptionsTest,
ZlibOptionsTest,
testing::Combine(
testing::Values(
ZlibFormat::ZLIB,
ZlibFormat::GZIP,
ZlibFormat::RAW,
ZlibFormat::AUTO),
testing::Values(9, 12, 15),
testing::Values(1, 8, 9),
testing::Values(
Z_DEFAULT_STRATEGY,
Z_FILTERED,
Z_HUFFMAN_ONLY,
Z_RLE,
Z_FIXED)));
#endif // FOLLY_HAVE_LIBZ
} // namespace test
} // namespace io
} // namespace folly
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment