Commit 9f160366 authored by Philip Pronin's avatar Philip Pronin Committed by Sara Golemon

EliasFanoReader::goTo()

Summary: Random lookup support.

Test Plan: fbconfig -r folly/experimental/test:eliasfano_test && fbmake runtests_opt -j32

@override-unit-failures

Reviewed By: soren@fb.com

FB internal diff: D1473244

Tasks: 4536072
parent 874ef580
...@@ -310,12 +310,17 @@ class UpperBitsReader { ...@@ -310,12 +310,17 @@ class UpperBitsReader {
explicit UpperBitsReader(const EliasFanoCompressedList& list) explicit UpperBitsReader(const EliasFanoCompressedList& list)
: forwardPointers_(list.forwardPointers.data()), : forwardPointers_(list.forwardPointers.data()),
skipPointers_(list.skipPointers.data()), skipPointers_(list.skipPointers.data()),
start_(list.upper.data()), start_(list.upper.data()) {
block_(start_ != nullptr ? folly::loadUnaligned<block_t>(start_) : 0), reset();
outer_(0), // outer offset: number of consumed bytes in upper. }
inner_(-1), // inner offset: (bit) position in current block.
position_(-1), // index of current value (= #reads - 1). void reset() {
value_(0) { } block_ = start_ != nullptr ? folly::loadUnaligned<block_t>(start_) : 0;
outer_ = 0;
inner_ = -1;
position_ = -1;
value_ = 0;
}
size_t position() const { return position_; } size_t position() const { return position_; }
ValueType value() const { return value_; } ValueType value() const { return value_; }
...@@ -437,6 +442,15 @@ class UpperBitsReader { ...@@ -437,6 +442,15 @@ class UpperBitsReader {
return value_; return value_;
} }
ValueType goTo(size_t n) {
if (Encoder::forwardQuantum == 0 || n <= Encoder::forwardQuantum) {
reset();
} else {
position_ = -1; // Avoid reading the head, skip() will call reposition().
}
return skip(n);
}
private: private:
ValueType setValue() { ValueType setValue() {
value_ = static_cast<ValueType>(8 * outer_ + inner_ - position_); value_ = static_cast<ValueType>(8 * outer_ + inner_ - position_);
...@@ -454,9 +468,9 @@ class UpperBitsReader { ...@@ -454,9 +468,9 @@ class UpperBitsReader {
const unsigned char* const skipPointers_; const unsigned char* const skipPointers_;
const unsigned char* const start_; const unsigned char* const start_;
block_t block_; block_t block_;
size_t outer_; size_t outer_; // Outer offset: number of consumed bytes in upper.
size_t inner_; size_t inner_; // Inner offset: (bit) position in current block.
size_t position_; size_t position_; // Index of current value (= #reads - 1).
ValueType value_; ValueType value_;
}; };
...@@ -466,14 +480,13 @@ template <class Encoder, ...@@ -466,14 +480,13 @@ template <class Encoder,
class Instructions = instructions::Default> class Instructions = instructions::Default>
class EliasFanoReader : private boost::noncopyable { class EliasFanoReader : private boost::noncopyable {
public: public:
typedef Encoder EncoderType;
typedef typename Encoder::ValueType ValueType; typedef typename Encoder::ValueType ValueType;
explicit EliasFanoReader(const EliasFanoCompressedList& list) explicit EliasFanoReader(const EliasFanoCompressedList& list)
: list_(list), : list_(list),
lowerMask_((ValueType(1) << list_.numLowerBits) - 1), lowerMask_((ValueType(1) << list_.numLowerBits) - 1),
upper_(list), upper_(list_) {
progress_(0),
value_(0) {
DCHECK(Instructions::supported()); DCHECK(Instructions::supported());
// To avoid extra branching during skipTo() while reading // To avoid extra branching during skipTo() while reading
// upper sequence we need to know the last element. // upper sequence we need to know the last element.
...@@ -508,11 +521,10 @@ class EliasFanoReader : private boost::noncopyable { ...@@ -508,11 +521,10 @@ class EliasFanoReader : private boost::noncopyable {
bool skip(size_t n) { bool skip(size_t n) {
CHECK_GT(n, 0); CHECK_GT(n, 0);
progress_ += n - 1; progress_ += n;
if (LIKELY(progress_ < list_.size)) { if (LIKELY(progress_ <= list_.size)) {
value_ = readLowerPart(progress_) | value_ = readLowerPart(progress_ - 1) |
(upper_.skip(n) << list_.numLowerBits); (upper_.skip(n) << list_.numLowerBits);
++progress_;
return true; return true;
} }
...@@ -546,8 +558,25 @@ class EliasFanoReader : private boost::noncopyable { ...@@ -546,8 +558,25 @@ class EliasFanoReader : private boost::noncopyable {
return true; return true;
} }
bool goTo(size_t n) {
if (LIKELY(n - 1 < list_.size)) { // n > 0 && n <= list_.size
progress_ = n;
value_ = readLowerPart(n - 1) | (upper_.goTo(n) << list_.numLowerBits);
return true;
} else if (n == 0) {
upper_.reset();
progress_ = 0;
value_ = 0;
return true;
}
progress_ = list_.size;
value_ = std::numeric_limits<ValueType>::max();
return false;
}
private: private:
ValueType readLowerPart(size_t i) const { ValueType readLowerPart(size_t i) const {
DCHECK_LT(i, list_.size);
const size_t pos = i * list_.numLowerBits; const size_t pos = i * list_.numLowerBits;
const unsigned char* ptr = list_.lower.data() + (pos / 8); const unsigned char* ptr = list_.lower.data() + (pos / 8);
const uint64_t ptrv = folly::loadUnaligned<uint64_t>(ptr); const uint64_t ptrv = folly::loadUnaligned<uint64_t>(ptr);
...@@ -557,8 +586,8 @@ class EliasFanoReader : private boost::noncopyable { ...@@ -557,8 +586,8 @@ class EliasFanoReader : private boost::noncopyable {
const EliasFanoCompressedList list_; const EliasFanoCompressedList list_;
const ValueType lowerMask_; const ValueType lowerMask_;
detail::UpperBitsReader<Encoder, Instructions> upper_; detail::UpperBitsReader<Encoder, Instructions> upper_;
size_t progress_; size_t progress_ = 0;
ValueType value_; ValueType value_ = 0;
ValueType lastValue_; ValueType lastValue_;
}; };
......
...@@ -29,13 +29,13 @@ ...@@ -29,13 +29,13 @@
namespace folly { namespace compression { namespace folly { namespace compression {
std::vector<uint32_t> generateRandomList(size_t n, uint32_t maxId) { template <class URNG>
std::vector<uint32_t> generateRandomList(size_t n, uint32_t maxId, URNG&& g) {
CHECK_LT(n, 2 * maxId); CHECK_LT(n, 2 * maxId);
std::mt19937 gen;
std::uniform_int_distribution<> uid(1, maxId); std::uniform_int_distribution<> uid(1, maxId);
std::unordered_set<uint32_t> dataset; std::unordered_set<uint32_t> dataset;
while (dataset.size() < n) { while (dataset.size() < n) {
uint32_t value = uid(gen); uint32_t value = uid(g);
if (dataset.count(value) == 0) { if (dataset.count(value) == 0) {
dataset.insert(value); dataset.insert(value);
} }
...@@ -46,8 +46,13 @@ std::vector<uint32_t> generateRandomList(size_t n, uint32_t maxId) { ...@@ -46,8 +46,13 @@ std::vector<uint32_t> generateRandomList(size_t n, uint32_t maxId) {
return ids; return ids;
} }
std::vector<uint32_t> generateSeqList(uint32_t minId, uint32_t maxId, inline std::vector<uint32_t> generateRandomList(size_t n, uint32_t maxId) {
uint32_t step = 1) { std::mt19937 gen;
return generateRandomList(n, maxId, gen);
}
inline std::vector<uint32_t> generateSeqList(uint32_t minId, uint32_t maxId,
uint32_t step = 1) {
CHECK_LE(minId, maxId); CHECK_LE(minId, maxId);
CHECK_GT(step, 0); CHECK_GT(step, 0);
std::vector<uint32_t> ids; std::vector<uint32_t> ids;
...@@ -58,7 +63,7 @@ std::vector<uint32_t> generateSeqList(uint32_t minId, uint32_t maxId, ...@@ -58,7 +63,7 @@ std::vector<uint32_t> generateSeqList(uint32_t minId, uint32_t maxId,
return ids; return ids;
} }
std::vector<uint32_t> loadList(const std::string& filename) { inline std::vector<uint32_t> loadList(const std::string& filename) {
std::ifstream fin(filename); std::ifstream fin(filename);
std::vector<uint32_t> result; std::vector<uint32_t> result;
uint32_t id; uint32_t id;
...@@ -126,7 +131,6 @@ void testSkipTo(const std::vector<uint32_t>& data, const List& list, ...@@ -126,7 +131,6 @@ void testSkipTo(const std::vector<uint32_t>& data, const List& list,
EXPECT_EQ(reader.value(), *it); EXPECT_EQ(reader.value(), *it);
value = reader.value() + delta; value = reader.value() + delta;
} }
EXPECT_EQ(reader.value(), std::numeric_limits<uint32_t>::max()); EXPECT_EQ(reader.value(), std::numeric_limits<uint32_t>::max());
EXPECT_FALSE(reader.next()); EXPECT_FALSE(reader.next());
} }
...@@ -148,6 +152,29 @@ void testSkipTo(const std::vector<uint32_t>& data, const List& list) { ...@@ -148,6 +152,29 @@ void testSkipTo(const std::vector<uint32_t>& data, const List& list) {
} }
} }
template <class Reader, class List>
void testGoTo(const std::vector<uint32_t>& data, const List& list) {
std::mt19937 gen;
std::vector<size_t> is(data.size());
for (size_t i = 0; i < data.size(); ++i) {
is[i] = i;
}
std::shuffle(is.begin(), is.end(), gen);
if (Reader::EncoderType::forwardQuantum == 0) {
is.resize(std::min<size_t>(is.size(), 100));
}
Reader reader(list);
EXPECT_TRUE(reader.goTo(0));
EXPECT_EQ(reader.value(), 0);
for (auto i : is) {
EXPECT_TRUE(reader.goTo(i + 1));
EXPECT_EQ(reader.value(), data[i]);
}
EXPECT_FALSE(reader.goTo(data.size() + 1));
EXPECT_EQ(reader.value(), std::numeric_limits<uint32_t>::max());
}
template <class Reader, class Encoder> template <class Reader, class Encoder>
void testEmpty() { void testEmpty() {
typename Encoder::CompressedList list; typename Encoder::CompressedList list;
...@@ -176,6 +203,7 @@ void testAll(const std::vector<uint32_t>& data) { ...@@ -176,6 +203,7 @@ void testAll(const std::vector<uint32_t>& data) {
testNext<Reader>(data, list); testNext<Reader>(data, list);
testSkip<Reader>(data, list); testSkip<Reader>(data, list);
testSkipTo<Reader>(data, list); testSkipTo<Reader>(data, list);
testGoTo<Reader>(data, list);
list.free(); list.free();
} }
...@@ -226,6 +254,23 @@ void bmSkipTo(const List& list, const std::vector<uint32_t>& data, ...@@ -226,6 +254,23 @@ void bmSkipTo(const List& list, const std::vector<uint32_t>& data,
} }
} }
template <class Reader, class List>
void bmGoTo(const List& list, const std::vector<uint32_t>& data,
const std::vector<size_t>& order, size_t iters) {
CHECK(!data.empty());
CHECK_EQ(data.size(), order.size());
Reader reader(list);
for (size_t i = 0; i < iters; ) {
for (size_t j : order) {
reader.goTo(j + 1);
const uint32_t value = reader.value();
CHECK_EQ(value, data[j]);
++i;
}
}
}
}} // namespaces }} // namespaces
#endif // FOLLY_EXPERIMENTAL_CODING_TEST_UTILS_H #endif // FOLLY_EXPERIMENTAL_CODING_TEST_UTILS_H
...@@ -14,6 +14,10 @@ ...@@ -14,6 +14,10 @@
* limitations under the License. * limitations under the License.
*/ */
#include <algorithm>
#include <random>
#include <vector>
#include <folly/Benchmark.h> #include <folly/Benchmark.h>
#include <folly/experimental/EliasFanoCoding.h> #include <folly/experimental/EliasFanoCoding.h>
#include <folly/experimental/test/CodingTestUtils.h> #include <folly/experimental/test/CodingTestUtils.h>
...@@ -76,12 +80,23 @@ typedef EliasFanoEncoder<uint32_t, uint32_t, 128, 128, kVersion> Encoder; ...@@ -76,12 +80,23 @@ typedef EliasFanoEncoder<uint32_t, uint32_t, 128, 128, kVersion> Encoder;
typedef EliasFanoReader<Encoder> Reader; typedef EliasFanoReader<Encoder> Reader;
std::vector<uint32_t> data; std::vector<uint32_t> data;
std::vector<size_t> order;
typename Encoder::CompressedList list; typename Encoder::CompressedList list;
void init() { void init() {
data = generateRandomList(100 * 1000, 10 * 1000 * 1000); std::mt19937 gen;
data = generateRandomList(100 * 1000, 10 * 1000 * 1000, gen);
//data = loadList("/home/philipp/pl_test_dump.txt"); //data = loadList("/home/philipp/pl_test_dump.txt");
Encoder::encode(data.data(), data.size(), bm::list); Encoder::encode(data.data(), data.size(), bm::list);
order.clear();
order.reserve(data.size());
for (size_t i = 0; i < data.size(); ++i) {
order.push_back(i);
}
std::shuffle(order.begin(), order.end(), gen);
} }
void free() { void free() {
...@@ -110,6 +125,10 @@ BENCHMARK(Skip1000_ForwardQ128_1M) { ...@@ -110,6 +125,10 @@ BENCHMARK(Skip1000_ForwardQ128_1M) {
bmSkip<bm::Reader>(bm::list, bm::data, 1000, bm::k1M); bmSkip<bm::Reader>(bm::list, bm::data, 1000, bm::k1M);
} }
BENCHMARK(GoTo_ForwardQ128_1M) {
bmGoTo<bm::Reader>(bm::list, bm::data, bm::order, bm::k1M);
}
BENCHMARK(SkipTo1_SkipQ128_1M) { BENCHMARK(SkipTo1_SkipQ128_1M) {
bmSkipTo<bm::Reader>(bm::list, bm::data, 1, bm::k1M); bmSkipTo<bm::Reader>(bm::list, bm::data, 1, bm::k1M);
} }
...@@ -126,6 +145,25 @@ BENCHMARK(SkipTo1000_SkipQ128_1M) { ...@@ -126,6 +145,25 @@ BENCHMARK(SkipTo1000_SkipQ128_1M) {
bmSkipTo<bm::Reader>(bm::list, bm::data, 1000, bm::k1M); bmSkipTo<bm::Reader>(bm::list, bm::data, 1000, bm::k1M);
} }
#if 0
Intel Xeon CPU E5-2660 @ 2.7GHz (turbo on), using instructions::Fast.
============================================================================
folly/experimental/test/EliasFanoCodingTest.cpp relative time/iter iters/s
============================================================================
Next_1M 4.86ms 205.97
Skip1_ForwarQ128_1M 5.17ms 193.36
Skip10_ForwarQ128_1M 13.69ms 73.03
Skip100_ForwardQ128_1M 26.76ms 37.37
Skip1000_ForwardQ128_1M 20.66ms 48.40
GoTo_ForwardQ128_1M 43.75ms 22.86
SkipTo1_SkipQ128_1M 9.74ms 102.70
SkipTo10_SkipQ128_1M 30.62ms 32.66
SkipTo100_SkipQ128_1M 37.70ms 26.53
SkipTo1000_SkipQ128_1M 31.14ms 32.11
============================================================================
#endif
int main(int argc, char** argv) { int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv); testing::InitGoogleTest(&argc, argv);
gflags::ParseCommandLineFlags(&argc, &argv, true); gflags::ParseCommandLineFlags(&argc, &argv, true);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment