Commit cf316095 authored by Giuseppe Ottaviano's avatar Giuseppe Ottaviano Committed by Facebook GitHub Bot

Fix overflow in EliasFanoReader

Summary:
The upper bitvector size (in bits) can exceed the domain of `SizeType` even if the list otherwise respects the contract.

Also improve some of the tests.

Reviewed By: yfeldblum, philippv

Differential Revision: D22304614

fbshipit-source-id: bf846730c59451457d3e2d3cf14d90e3df012e0b
parent 77a38ddb
......@@ -87,8 +87,8 @@ struct EliasFanoCompressedListBase {
Pointer upper = nullptr;
};
typedef EliasFanoCompressedListBase<const uint8_t*> EliasFanoCompressedList;
typedef EliasFanoCompressedListBase<uint8_t*> MutableEliasFanoCompressedList;
using EliasFanoCompressedList = EliasFanoCompressedListBase<const uint8_t*>;
using MutableEliasFanoCompressedList = EliasFanoCompressedListBase<uint8_t*>;
template <
class Value,
......@@ -102,11 +102,12 @@ struct EliasFanoEncoderV2 {
std::is_integral<Value>::value && std::is_unsigned<Value>::value,
"Value should be unsigned integral");
typedef EliasFanoCompressedList CompressedList;
typedef MutableEliasFanoCompressedList MutableCompressedList;
using CompressedList = EliasFanoCompressedList;
using MutableCompressedList = MutableEliasFanoCompressedList;
using ValueType = Value;
using SkipValueType = SkipValue;
typedef Value ValueType;
typedef SkipValue SkipValueType;
struct Layout;
static constexpr size_t skipQuantum = kSkipQuantum;
......@@ -373,7 +374,7 @@ class UpperBitsReader : ForwardPointers<Encoder::forwardQuantum>,
SkipPointers<Encoder::skipQuantum>(list.skipPointers),
start_(list.upper),
size_(list.size),
upperBound_(8 * list.upperSizeBytes - size_) {
upperBound_(estimateUpperBound(list)) {
reset();
}
......@@ -506,7 +507,8 @@ class UpperBitsReader : ForwardPointers<Encoder::forwardQuantum>,
skip -= cnt;
position_ += kBitsPerBlock - cnt;
outer_ += sizeof(block_t);
DCHECK_LT(outer_, (static_cast<size_t>(upperBound_) + size()) / 8);
DCHECK_LT(outer_, (static_cast<size_t>(upperBound_) + size() + 7) / 8)
<< upperBound_ << " " << size() << " " << v;
block_ = folly::loadUnaligned<block_t>(start_ + outer_);
}
......@@ -590,22 +592,34 @@ class UpperBitsReader : ForwardPointers<Encoder::forwardQuantum>,
}
private:
using block_t = uint64_t;
// The size in bytes of the upper bits is limited by n + universe / 8,
// so a type that can hold either sizes or values is sufficient.
using OuterType = typename std::common_type<ValueType, SizeType>::type;
static ValueType estimateUpperBound(
const typename Encoder::CompressedList& list) {
size_t upperBound = 8 * list.upperSizeBytes - list.size;
// The bitvector is byte-aligned, so we may be overestimating the universe
// size. Make sure it fits in ValueType.
return static_cast<ValueType>(std::min<size_t>(
upperBound,
std::numeric_limits<ValueType>::max() >> list.numLowerBits));
}
FOLLY_ALWAYS_INLINE bool setValue(size_t inner) {
value_ = static_cast<ValueType>(8 * outer_ + inner - position_);
return true;
}
FOLLY_ALWAYS_INLINE void reposition(SizeType dest) {
// dest is a position in the bit vector, so SizeType may not be
// sufficient here.
FOLLY_ALWAYS_INLINE void reposition(size_t dest) {
outer_ = dest / 8;
block_ = folly::loadUnaligned<block_t>(start_ + outer_);
block_ &= ~((block_t(1) << (dest % 8)) - 1);
}
using block_t = uint64_t;
// The size in bytes of the upper bits is limited by n + universe / 8,
// so a type that can hold either sizes or values is sufficient.
using OuterType = typename std::common_type<ValueType, SizeType>::type;
FOLLY_ALWAYS_INLINE void
getPreviousInfo(block_t& block, size_t& inner, OuterType& outer) const {
DCHECK_NE(position(), std::numeric_limits<SizeType>::max());
......@@ -647,8 +661,8 @@ template <
class SizeType = typename Encoder::SkipValueType>
class EliasFanoReader {
public:
typedef Encoder EncoderType;
typedef typename Encoder::ValueType ValueType;
using EncoderType = Encoder;
using ValueType = typename Encoder::ValueType;
explicit EliasFanoReader(const typename Encoder::CompressedList& list)
: upper_(list), lower_(list.lower), numLowerBits_(list.numLowerBits) {
......@@ -774,7 +788,8 @@ class EliasFanoReader {
return true;
}
// We might be in the middle of a run, iterate backwards to the beginning.
// We might be in the middle of a run of equal values, reposition by
// iterating backwards to its first element.
auto valueLower = Instructions::bzhi(value_, numLowerBits_);
while (!upper_.isAtBeginningOfRun() &&
readLowerPart(position() - 1) == valueLower) {
......
......@@ -249,6 +249,7 @@ void testSkipTo(const std::vector<uint64_t>& data, const List& list) {
testSkipTo<Reader, List>(data, list, steps);
}
testSkipTo<Reader, List>(data, list, std::numeric_limits<size_t>::max());
{
// Skip to the first element.
Reader reader(list);
......@@ -256,29 +257,32 @@ void testSkipTo(const std::vector<uint64_t>& data, const List& list) {
EXPECT_EQ(reader.value(), data[0]);
EXPECT_EQ(reader.position(), 0);
}
{
// Skip past the last element.
Reader reader(list);
EXPECT_FALSE(reader.skipTo(data.back() + 1));
EXPECT_FALSE(reader.valid());
EXPECT_EQ(reader.position(), reader.size());
EXPECT_FALSE(reader.next());
}
{
// Skip to maximum integer.
Reader reader(list);
using ValueType = typename Reader::ValueType;
EXPECT_FALSE(reader.skipTo(std::numeric_limits<ValueType>::max()));
EXPECT_FALSE(reader.valid());
EXPECT_EQ(reader.position(), reader.size());
EXPECT_FALSE(reader.next());
}
// Skip past the last element and before the upperBound.
// Skip past the last element.
using ValueType = typename Reader::ValueType;
if (const auto upperBound = getUniverseUpperBound<ValueType>(list);
upperBound && *upperBound != data.back()) {
std::vector<ValueType> valuesPastTheEnd = {
// max() is not representable, so both values are past the end.
static_cast<ValueType>(data.back() + 1),
std::numeric_limits<ValueType>::max(),
};
// Exercise skipping past the last element but before the inferred upper
// bound.
if (const auto upperBound =
getUniverseUpperBound<ValueType>(list).value_or(data.back() + 1);
upperBound != data.back()) {
ValueType base = data.back() + 1;
for (ValueType value = base + 1;
// Stop for overflow.
value > base && value < upperBound;
value += value - base) {
valuesPastTheEnd.push_back(value);
}
}
for (auto value : valuesPastTheEnd) {
Reader reader(list);
EXPECT_FALSE(reader.skipTo(*upperBound));
EXPECT_FALSE(reader.skipTo(value));
EXPECT_FALSE(reader.valid());
EXPECT_EQ(reader.position(), reader.size());
EXPECT_FALSE(reader.next());
......
......@@ -34,16 +34,16 @@ namespace compression {
// Overload to help CodingTestUtils retrieve the universe upperbound
// of the list for certain test cases.
template <typename ValueType, typename T>
folly::Optional<std::size_t> getUniverseUpperBound(
folly::Optional<ValueType> getUniverseUpperBound(
const EliasFanoCompressedListBase<T>& list) {
constexpr ValueType maxUpperValue = std::numeric_limits<ValueType>::max();
const ValueType maxUpperBits = maxUpperValue >> list.numLowerBits;
const ValueType upperBitsUniverse = std::min(
static_cast<ValueType>(8 * list.upperSizeBytes - list.size),
maxUpperBits);
constexpr size_t kMaxUpperValue = std::numeric_limits<ValueType>::max();
const size_t maxUpperBits = kMaxUpperValue >> list.numLowerBits;
const ValueType upperBitsUniverse = static_cast<ValueType>(
std::min(8 * list.upperSizeBytes - list.size, maxUpperBits));
return (upperBitsUniverse << list.numLowerBits) |
((1 << list.numLowerBits) - 1);
((ValueType(1) << list.numLowerBits) - 1);
}
} // namespace compression
} // namespace folly
......@@ -134,7 +134,7 @@ class EliasFanoCodingTest : public ::testing::Test {
testAll<Reader, Encoder>(generateSeqList(1, 100000, 100));
// max() cannot be read, as it is assumed an invalid value.
// TODO(ott): It should be possible to lift this constraint.
testAll<Reader, Encoder>({0, 1, std::numeric_limits<uint32_t>::max() - 1});
testAll<Reader, Encoder>({0, 1, std::numeric_limits<ValueType>::max() - 1});
// Test data with additional trailing 0s in the upperBits by extending
// the upper bound.
constexpr uint64_t minUpperBoundExtension = 2;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment