Commit c2c47da2 authored by Giuseppe Ottaviano's avatar Giuseppe Ottaviano Committed by Alecs King

Elias-Fano micro-optimizations

Summary:
Short skips have been optimized by adding special cases that use simple iteration when it is convenient. Large skips have been optimized by using the broadword selection algorithm by Vigna (improved with ideas by Gog&Petri) instead of iterating on the zeros/ones of the upper bits.

The benchmarks had to be made more granular to measure the differences, in particular they used to test skipping with a fixed skip length for each test, while now we average over a range of skips to better simulate a random distribution.

The improvements are very significant for `skipTo()` on short skips, about 2-3x for skips at distance 1 or 2, which can occur when intersecting dense lists. On large skips the gain is about 17%.

`skip()` exhibits slightly smaller improvements.

before               after
============================================================================  ==================
folly/experimental/test/EliasFanoCodingTest.cpp relative  time/iter  iters/s  time/iter  iters/s
============================================================================  ==================
Next                                                         2.52ns  396.26M     2.52ns  397.28M
Skip_ForwardQ128(1)                                          8.66ns  115.52M     3.92ns  255.28M
Skip_ForwardQ128(2)                                          8.37ns  119.42M     5.08ns  197.04M
Skip_ForwardQ128(4_pm_1)                                     9.67ns  103.41M     7.04ns  142.02M
Skip_ForwardQ128(16_pm_4)                                   21.44ns   46.65M    19.68ns   50.82M
Skip_ForwardQ128(64_pm_16)                                  30.86ns   32.40M    27.58ns   36.26M
Skip_ForwardQ128(256_pm_64)                                 37.80ns   26.45M    32.49ns   30.78M
Skip_ForwardQ128(1024_pm_256)                               38.99ns   25.65M    33.39ns   29.95M
Jump_ForwardQ128                                            37.91ns   26.37M    34.05ns   29.37M
----------------------------------------------------------------------------  ------------------
SkipTo_SkipQ128(1)                                          13.87ns   72.10M     4.42ns  226.49M
SkipTo_SkipQ128(2)                                          18.80ns   53.20M     8.58ns  116.55M
SkipTo_SkipQ128(4_pm_1)                                     23.59ns   42.38M    11.43ns   87.50M
SkipTo_SkipQ128(16_pm_4)                                    36.04ns   27.74M    31.19ns   32.06M
SkipTo_SkipQ128(64_pm_16)                                   53.34ns   18.75M    43.88ns   22.79M
SkipTo_SkipQ128(256_pm_64)                                  62.27ns   16.06M    49.08ns   20.37M
SkipTo_SkipQ128(1024_pm_256)                                65.63ns   15.24M    52.24ns   19.14M
JumpTo_SkipQ128                                             65.89ns   15.18M    54.61ns   18.31M
----------------------------------------------------------------------------  ------------------
Encode_10                                                  111.94ns    8.93M   117.24ns    8.53M
Encode                                                       5.35ms   187.02     5.64ms   177.15
----------------------------------------------------------------------------  ------------------
Select64                                                     8.07ns  123.96M     8.04ns  124.35M
============================================================================  ==================

Test Plan:
fbconfig folly/experimental/test:eliasfano_test && fbmake runtests_opt

Reviewed By: philipp@fb.com

Subscribers: yfeldblum, fbcode-common-diffs@, chaoyc, search-fbcode-diffs@, unicorn-diffs@, trunkagent, folly-diffs@

FB internal diff: D1793554

Signature: t1:1793554:1423619344:1b078c0789639f317342ebcc77b11fe91859cd7b
parent 3f653ebe
......@@ -75,6 +75,7 @@ nobase_follyinclude_HEADERS = \
experimental/Singleton.h \
experimental/Singleton-inl.h \
experimental/TestUtil.h \
experimental/Select64.h \
FBString.h \
FBVector.h \
File.h \
......@@ -335,6 +336,7 @@ libfolly_la_SOURCES = \
experimental/io/FsUtil.cpp \
experimental/Singleton.cpp \
experimental/TestUtil.cpp \
experimental/Select64.cpp \
wangle/acceptor/Acceptor.cpp \
wangle/acceptor/ConnectionManager.cpp \
wangle/acceptor/LoadShedConfiguration.cpp \
......
......@@ -42,6 +42,7 @@
#include <folly/CpuId.h>
#include <folly/Likely.h>
#include <folly/Range.h>
#include <folly/experimental/Select64.h>
#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
#error EliasFanoCoding.h requires little endianness
......@@ -405,18 +406,10 @@ class UpperBitsReader {
block_ = folly::loadUnaligned<block_t>(start_ + outer_);
}
// NOTE: Trying to skip half-block here didn't show any
// performance improvements.
// Skip to the n-th one in the block.
DCHECK_GT(n, 0);
// Kill n - 1 least significant 1-bits.
for (size_t i = 0; i < n - 1; ++i) {
block_ = Instructions::blsr(block_);
}
inner_ = Instructions::ctz(block_);
block_ = Instructions::blsr(block_);
inner_ = select64<Instructions>(block_, n - 1);
block_ &= (block_t(-1) << inner_) << 1;
return setValue();
}
......@@ -463,16 +456,13 @@ class UpperBitsReader {
block_ = folly::loadUnaligned<block_t>(start_ + outer_);
}
// Try to skip half-block.
constexpr size_t kBitsPerHalfBlock = 4 * sizeof(block_t);
constexpr block_t halfBlockMask = (block_t(1) << kBitsPerHalfBlock) - 1;
if ((cnt = Instructions::popcount(~block_ & halfBlockMask)) < skip) {
position_ += kBitsPerHalfBlock - cnt;
block_ &= ~halfBlockMask;
if (LIKELY(skip)) {
auto inner = select64<Instructions>(~block_, skip - 1);
position_ += inner - skip + 1;
block_ &= block_t(-1) << inner;
}
// Just skip until we see expected value.
while (next() < v) { }
next();
return value_;
}
......@@ -567,8 +557,13 @@ class EliasFanoReader : private boost::noncopyable {
progress_ += n;
if (LIKELY(progress_ <= list_.size)) {
if (LIKELY(n < kLinearScanThreshold)) {
for (size_t i = 0; i < n; ++i) upper_.next();
} else {
upper_.skip(n);
}
value_ = readLowerPart(progress_ - 1) |
(upper_.skip(n) << list_.numLowerBits);
(upper_.value() << list_.numLowerBits);
return true;
}
......@@ -587,7 +582,18 @@ class EliasFanoReader : private boost::noncopyable {
return false;
}
upper_.skipToNext(value >> list_.numLowerBits);
size_t upperValue = (value >> list_.numLowerBits);
size_t upperSkip = upperValue - upper_.value();
// The average density of ones in upper bits is 1/2.
// LIKELY here seems to make things worse, even for small skips.
if (upperSkip < 2 * kLinearScanThreshold) {
do {
upper_.next();
} while (UNLIKELY(upper_.value() < upperValue));
} else {
upper_.skipToNext(upperValue);
}
iterateTo(value);
return true;
}
......@@ -636,15 +642,17 @@ class EliasFanoReader : private boost::noncopyable {
}
void iterateTo(ValueType value) {
progress_ = upper_.position();
value_ = readLowerPart(progress_) | (upper_.value() << list_.numLowerBits);
++progress_;
while (value_ < value) {
value_ = readLowerPart(progress_) | (upper_.next() << list_.numLowerBits);
++progress_;
while (true) {
value_ = readLowerPart(upper_.position()) |
(upper_.value() << list_.numLowerBits);
if (LIKELY(value_ >= value)) break;
upper_.next();
}
progress_ = upper_.position() + 1;
}
constexpr static size_t kLinearScanThreshold = 8;
const EliasFanoCompressedList list_;
const ValueType lowerMask_;
detail::UpperBitsReader<Encoder, Instructions> upper_;
......
/*
* Copyright 2015 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <folly/experimental/Select64.h>
#include <cstdint>
namespace folly { namespace detail {
const uint8_t kSelectInByte[2048] = {
8, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0,
1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0,
2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0,
1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0,
3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 7, 0,
1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0,
2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0,
1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0,
1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 8, 8, 8, 1,
8, 2, 2, 1, 8, 3, 3, 1, 3, 2, 2, 1, 8, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2,
2, 1, 8, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 2, 2, 1, 5, 4, 4, 1, 4, 2, 2, 1,
4, 3, 3, 1, 3, 2, 2, 1, 8, 6, 6, 1, 6, 2, 2, 1, 6, 3, 3, 1, 3, 2, 2, 1, 6, 4,
4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1, 6, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1,
3, 2, 2, 1, 5, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1, 8, 7, 7, 1, 7, 2,
2, 1, 7, 3, 3, 1, 3, 2, 2, 1, 7, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1,
7, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 2, 2, 1, 5, 4, 4, 1, 4, 2, 2, 1, 4, 3,
3, 1, 3, 2, 2, 1, 7, 6, 6, 1, 6, 2, 2, 1, 6, 3, 3, 1, 3, 2, 2, 1, 6, 4, 4, 1,
4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1, 6, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 2,
2, 1, 5, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1, 8, 8, 8, 8, 8, 8, 8, 2,
8, 8, 8, 3, 8, 3, 3, 2, 8, 8, 8, 4, 8, 4, 4, 2, 8, 4, 4, 3, 4, 3, 3, 2, 8, 8,
8, 5, 8, 5, 5, 2, 8, 5, 5, 3, 5, 3, 3, 2, 8, 5, 5, 4, 5, 4, 4, 2, 5, 4, 4, 3,
4, 3, 3, 2, 8, 8, 8, 6, 8, 6, 6, 2, 8, 6, 6, 3, 6, 3, 3, 2, 8, 6, 6, 4, 6, 4,
4, 2, 6, 4, 4, 3, 4, 3, 3, 2, 8, 6, 6, 5, 6, 5, 5, 2, 6, 5, 5, 3, 5, 3, 3, 2,
6, 5, 5, 4, 5, 4, 4, 2, 5, 4, 4, 3, 4, 3, 3, 2, 8, 8, 8, 7, 8, 7, 7, 2, 8, 7,
7, 3, 7, 3, 3, 2, 8, 7, 7, 4, 7, 4, 4, 2, 7, 4, 4, 3, 4, 3, 3, 2, 8, 7, 7, 5,
7, 5, 5, 2, 7, 5, 5, 3, 5, 3, 3, 2, 7, 5, 5, 4, 5, 4, 4, 2, 5, 4, 4, 3, 4, 3,
3, 2, 8, 7, 7, 6, 7, 6, 6, 2, 7, 6, 6, 3, 6, 3, 3, 2, 7, 6, 6, 4, 6, 4, 4, 2,
6, 4, 4, 3, 4, 3, 3, 2, 7, 6, 6, 5, 6, 5, 5, 2, 6, 5, 5, 3, 5, 3, 3, 2, 6, 5,
5, 4, 5, 4, 4, 2, 5, 4, 4, 3, 4, 3, 3, 2, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 3, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 4, 8, 4, 4, 3, 8, 8, 8, 8, 8, 8,
8, 5, 8, 8, 8, 5, 8, 5, 5, 3, 8, 8, 8, 5, 8, 5, 5, 4, 8, 5, 5, 4, 5, 4, 4, 3,
8, 8, 8, 8, 8, 8, 8, 6, 8, 8, 8, 6, 8, 6, 6, 3, 8, 8, 8, 6, 8, 6, 6, 4, 8, 6,
6, 4, 6, 4, 4, 3, 8, 8, 8, 6, 8, 6, 6, 5, 8, 6, 6, 5, 6, 5, 5, 3, 8, 6, 6, 5,
6, 5, 5, 4, 6, 5, 5, 4, 5, 4, 4, 3, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 7, 8, 7,
7, 3, 8, 8, 8, 7, 8, 7, 7, 4, 8, 7, 7, 4, 7, 4, 4, 3, 8, 8, 8, 7, 8, 7, 7, 5,
8, 7, 7, 5, 7, 5, 5, 3, 8, 7, 7, 5, 7, 5, 5, 4, 7, 5, 5, 4, 5, 4, 4, 3, 8, 8,
8, 7, 8, 7, 7, 6, 8, 7, 7, 6, 7, 6, 6, 3, 8, 7, 7, 6, 7, 6, 6, 4, 7, 6, 6, 4,
6, 4, 4, 3, 8, 7, 7, 6, 7, 6, 6, 5, 7, 6, 6, 5, 6, 5, 5, 3, 7, 6, 6, 5, 6, 5,
5, 4, 6, 5, 5, 4, 5, 4, 4, 3, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 5, 8, 8, 8, 8, 8, 8, 8, 5, 8, 8, 8, 5, 8, 5, 5, 4, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 6, 8, 8, 8, 8, 8, 8, 8, 6, 8, 8, 8, 6, 8, 6,
6, 4, 8, 8, 8, 8, 8, 8, 8, 6, 8, 8, 8, 6, 8, 6, 6, 5, 8, 8, 8, 6, 8, 6, 6, 5,
8, 6, 6, 5, 6, 5, 5, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8,
8, 8, 8, 8, 8, 7, 8, 8, 8, 7, 8, 7, 7, 4, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 7,
8, 7, 7, 5, 8, 8, 8, 7, 8, 7, 7, 5, 8, 7, 7, 5, 7, 5, 5, 4, 8, 8, 8, 8, 8, 8,
8, 7, 8, 8, 8, 7, 8, 7, 7, 6, 8, 8, 8, 7, 8, 7, 7, 6, 8, 7, 7, 6, 7, 6, 6, 4,
8, 8, 8, 7, 8, 7, 7, 6, 8, 7, 7, 6, 7, 6, 6, 5, 8, 7, 7, 6, 7, 6, 6, 5, 7, 6,
6, 5, 6, 5, 5, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 5, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 6, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 6, 8, 8, 8, 8, 8, 8, 8, 6, 8, 8, 8, 6,
8, 6, 6, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7,
8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 7, 8, 7, 7, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 7, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 7, 8, 7, 7, 6, 8, 8, 8, 8,
8, 8, 8, 7, 8, 8, 8, 7, 8, 7, 7, 6, 8, 8, 8, 7, 8, 7, 7, 6, 8, 7, 7, 6, 7, 6,
6, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 6,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 7, 8, 7, 7, 6, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7
};
}}
/*
* Copyright 2015 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef FOLLY_EXPERIMENTAL_SELECT64_H
#define FOLLY_EXPERIMENTAL_SELECT64_H
#include <glog/logging.h>
namespace folly {
namespace detail {
extern const uint8_t kSelectInByte[2048];
}
/**
* Returns the position of the k-th 1 in the 64-bit word x.
* k is 0-based, so k=0 returns the position of the first 1.
*
* Uses the broadword selection algorithm by Vigna [1], improved by Gog
* and Petri [2] and Vigna [3].
*
* [1] Sebastiano Vigna. Broadword Implementation of Rank/Select
* Queries. WEA, 2008
*
* [2] Simon Gog, Matthias Petri. Optimized succinct data structures
* for massive data. Softw. Pract. Exper., 2014
*
* [3] Sebastiano Vigna. MG4J 5.2.1. http://mg4j.di.unimi.it/
*/
template <class Instructions>
inline uint64_t select64(uint64_t x, uint64_t k) {
DCHECK_LT(k, Instructions::popcount(x));
constexpr uint64_t kOnesStep4 = 0x1111111111111111ULL;
constexpr uint64_t kOnesStep8 = 0x0101010101010101ULL;
constexpr uint64_t kMSBsStep8 = 0x80ULL * kOnesStep8;
auto s = x;
s = s - ((s & 0xA * kOnesStep4) >> 1);
s = (s & 0x3 * kOnesStep4) + ((s >> 2) & 0x3 * kOnesStep4);
s = (s + (s >> 4)) & 0xF * kOnesStep8;
uint64_t byteSums = s * kOnesStep8;
uint64_t kStep8 = k * kOnesStep8;
uint64_t geqKStep8 = (((kStep8 | kMSBsStep8) - byteSums) & kMSBsStep8);
uint64_t place = Instructions::popcount(geqKStep8) * 8;
uint64_t byteRank = k - (((byteSums << 8) >> place) & uint64_t(0xFF));
return place + detail::kSelectInByte[((x >> place) & 0xFF) | (byteRank << 8)];
}
}
#endif
......@@ -80,9 +80,11 @@ void testNext(const std::vector<uint32_t>& data, const List& list) {
for (size_t i = 0; i < data.size(); ++i) {
EXPECT_TRUE(reader.next());
EXPECT_EQ(reader.value(), data[i]);
EXPECT_EQ(reader.position(), i);
}
EXPECT_FALSE(reader.next());
EXPECT_EQ(reader.value(), std::numeric_limits<uint32_t>::max());
EXPECT_EQ(reader.position(), reader.size() - 1);
}
template <class Reader, class List>
......@@ -94,9 +96,11 @@ void testSkip(const std::vector<uint32_t>& data, const List& list,
for (size_t i = skipStep - 1; i < data.size(); i += skipStep) {
EXPECT_TRUE(reader.skip(skipStep));
EXPECT_EQ(reader.value(), data[i]);
EXPECT_EQ(reader.position(), i);
}
EXPECT_FALSE(reader.skip(skipStep));
EXPECT_EQ(reader.value(), std::numeric_limits<uint32_t>::max());
EXPECT_EQ(reader.position(), reader.size() - 1);
EXPECT_FALSE(reader.next());
}
......@@ -132,6 +136,7 @@ void testSkipTo(const std::vector<uint32_t>& data, const List& list,
value = reader.value() + delta;
}
EXPECT_EQ(reader.value(), std::numeric_limits<uint32_t>::max());
EXPECT_EQ(reader.position(), reader.size() - 1);
EXPECT_FALSE(reader.next());
}
......@@ -148,6 +153,7 @@ void testSkipTo(const std::vector<uint32_t>& data, const List& list) {
Reader reader(list);
EXPECT_FALSE(reader.skipTo(data.back() + 1));
EXPECT_EQ(reader.value(), std::numeric_limits<uint32_t>::max());
EXPECT_EQ(reader.position(), reader.size() - 1);
EXPECT_FALSE(reader.next());
}
}
......@@ -170,9 +176,11 @@ void testJump(const std::vector<uint32_t>& data, const List& list) {
for (auto i : is) {
EXPECT_TRUE(reader.jump(i + 1));
EXPECT_EQ(reader.value(), data[i]);
EXPECT_EQ(reader.position(), i);
}
EXPECT_FALSE(reader.jump(data.size() + 1));
EXPECT_EQ(reader.value(), std::numeric_limits<uint32_t>::max());
EXPECT_EQ(reader.position(), reader.size() - 1);
}
template <class Reader, class List>
......@@ -190,21 +198,26 @@ void testJumpTo(const std::vector<uint32_t>& data, const List& list) {
CHECK(it != data.end());
EXPECT_TRUE(reader.jumpTo(value));
EXPECT_EQ(reader.value(), *it);
EXPECT_EQ(reader.position(), std::distance(data.begin(), it));
}
EXPECT_TRUE(reader.jumpTo(0));
EXPECT_EQ(reader.value(), 0);
EXPECT_EQ(reader.position(), -1);
if (data.front() > 0) {
EXPECT_TRUE(reader.jumpTo(1));
EXPECT_EQ(reader.value(), data.front());
EXPECT_EQ(reader.position(), 0);
}
EXPECT_TRUE(reader.jumpTo(data.back()));
EXPECT_EQ(reader.value(), data.back());
EXPECT_EQ(reader.position(), reader.size() - 1);
EXPECT_FALSE(reader.jumpTo(data.back() + 1));
EXPECT_EQ(reader.value(), std::numeric_limits<uint32_t>::max());
EXPECT_EQ(reader.position(), reader.size() - 1);
}
template <class Reader, class Encoder>
......@@ -242,46 +255,57 @@ void testAll(const std::vector<uint32_t>& data) {
}
template <class Reader, class List>
void bmNext(const List& list, const std::vector<uint32_t>& data,
size_t iters) {
void bmNext(const List& list, const std::vector<uint32_t>& data, size_t iters) {
if (data.empty()) {
return;
}
for (size_t i = 0, j; i < iters; ) {
Reader reader(list);
for (j = 0; reader.next(); ++j, ++i) {
CHECK_EQ(reader.value(), data[j]) << j;
Reader reader(list);
for (size_t i = 0; i < iters; ++i) {
if (LIKELY(reader.next())) {
folly::doNotOptimizeAway(reader.value());
} else {
reader.reset();
}
}
}
template <class Reader, class List>
void bmSkip(const List& list, const std::vector<uint32_t>& data,
size_t skip, size_t iters) {
if (skip >= data.size()) {
return;
}
for (size_t i = 0, j; i < iters; ) {
Reader reader(list);
for (j = skip - 1; j < data.size(); j += skip, ++i) {
reader.skip(skip);
CHECK_EQ(reader.value(), data[j]);
size_t logAvgSkip, size_t iters) {
size_t avg = (size_t(1) << logAvgSkip);
size_t base = avg - (avg >> 2);
size_t mask = (avg > 1) ? (avg >> 1) - 1 : 0;
Reader reader(list);
for (size_t i = 0; i < iters; ++i) {
size_t skip = base + (i & mask);
if (LIKELY(reader.skip(skip))) {
folly::doNotOptimizeAway(reader.value());
} else {
reader.reset();
}
}
}
template <class Reader, class List>
void bmSkipTo(const List& list, const std::vector<uint32_t>& data,
size_t skip, size_t iters) {
if (skip >= data.size()) {
return;
}
for (size_t i = 0, j; i < iters; ) {
Reader reader(list);
for (j = 0; j < data.size(); j += skip, ++i) {
reader.skipTo(data[j]);
CHECK_EQ(reader.value(), data[j]);
size_t logAvgSkip, size_t iters) {
size_t avg = (size_t(1) << logAvgSkip);
size_t base = avg - (avg >> 2);
size_t mask = (avg > 1) ? (avg >> 1) - 1 : 0;
Reader reader(list);
for (size_t i = 0, j = -1; i < iters; ++i) {
size_t skip = base + (i & mask);
j += skip;
if (j >= data.size()) {
reader.reset();
j = -1;
}
reader.skipTo(data[j]);
folly::doNotOptimizeAway(reader.value());
}
}
......@@ -292,12 +316,10 @@ void bmJump(const List& list, const std::vector<uint32_t>& data,
CHECK_EQ(data.size(), order.size());
Reader reader(list);
for (size_t i = 0; i < iters; ) {
for (size_t j : order) {
reader.jump(j + 1);
CHECK_EQ(reader.value(), data[j]);
++i;
}
for (size_t i = 0, j = 0; i < iters; ++i, ++j) {
if (j == order.size()) j = 0;
reader.jump(order[j]);
folly::doNotOptimizeAway(reader.value());
}
}
......@@ -308,12 +330,10 @@ void bmJumpTo(const List& list, const std::vector<uint32_t>& data,
CHECK_EQ(data.size(), order.size());
Reader reader(list);
for (size_t i = 0; i < iters; ) {
for (size_t j : order) {
reader.jumpTo(data[j]);
CHECK_EQ(reader.value(), data[j]);
++i;
}
for (size_t i = 0, j = 0; i < iters; ++i, ++j) {
if (j == order.size()) j = 0;
reader.jumpTo(data[order[j]]);
folly::doNotOptimizeAway(reader.value());
}
}
......
......@@ -15,15 +15,25 @@
*/
#include <algorithm>
#include <numeric>
#include <random>
#include <vector>
#include <folly/Benchmark.h>
#include <folly/experimental/EliasFanoCoding.h>
#include <folly/experimental/Select64.h>
#include <folly/experimental/test/CodingTestUtils.h>
using namespace folly::compression;
#if defined(EF_TEST_NEHALEM)
#define EF_TEST_ARCH Nehalem
#elif defined(EF_TEST_HASWELL)
#define EF_TEST_ARCH Haswell
#else
#define EF_TEST_ARCH Default
#endif
template <size_t kVersion>
struct TestType {
static constexpr size_t Version = kVersion;
......@@ -42,7 +52,7 @@ class EliasFanoCodingTest : public ::testing::Test {
void doTestAll() {
typedef EliasFanoEncoder<
uint32_t, uint32_t, kSkipQuantum, kForwardQuantum, T::Version> Encoder;
typedef EliasFanoReader<Encoder> Reader;
typedef EliasFanoReader<Encoder, instructions::EF_TEST_ARCH> Reader;
testAll<Reader, Encoder>(generateRandomList(100 * 1000, 10 * 1000 * 1000));
testAll<Reader, Encoder>(generateSeqList(1, 100000, 100));
}
......@@ -94,11 +104,8 @@ void init() {
//data = loadList("/home/philipp/pl_test_dump.txt");
list = Encoder::encode(data.begin(), data.end());
order.clear();
order.reserve(data.size());
for (size_t i = 0; i < data.size(); ++i) {
order.push_back(i);
}
order.resize(data.size());
std::iota(order.begin(), order.end(), size_t());
std::shuffle(order.begin(), order.end(), gen);
encodeSmallData = generateRandomList(10, 100 * 1000, gen);
......@@ -111,50 +118,44 @@ void free() {
} // namespace bm
BENCHMARK(Next_1M) {
bmNext<bm::Reader>(bm::list, bm::data, bm::k1M);
BENCHMARK(Next, iters) {
bmNext<bm::Reader>(bm::list, bm::data, iters);
}
BENCHMARK(Skip1_ForwarQ128_1M) {
bmSkip<bm::Reader>(bm::list, bm::data, 1, bm::k1M);
size_t Skip_ForwardQ128(size_t iters, size_t logAvgSkip) {
bmSkip<bm::Reader>(bm::list, bm::data, logAvgSkip, iters);
return iters;
}
BENCHMARK(Skip10_ForwarQ128_1M) {
bmSkip<bm::Reader>(bm::list, bm::data, 10, bm::k1M);
}
BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 1, 0)
BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 2, 1)
BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 4_pm_1, 2)
BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 16_pm_4, 4)
BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 64_pm_16, 6)
BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 256_pm_64, 8)
BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 1024_pm_256, 10)
BENCHMARK(Skip100_ForwardQ128_1M) {
bmSkip<bm::Reader>(bm::list, bm::data, 100, bm::k1M);
}
BENCHMARK(Skip1000_ForwardQ128_1M) {
bmSkip<bm::Reader>(bm::list, bm::data, 1000, bm::k1M);
}
BENCHMARK(Jump_ForwardQ128_1M) {
bmJump<bm::Reader>(bm::list, bm::data, bm::order, bm::k1M);
BENCHMARK(Jump_ForwardQ128, iters) {
bmJump<bm::Reader>(bm::list, bm::data, bm::order, iters);
}
BENCHMARK_DRAW_LINE();
BENCHMARK(SkipTo1_SkipQ128_1M) {
bmSkipTo<bm::Reader>(bm::list, bm::data, 1, bm::k1M);
size_t SkipTo_SkipQ128(size_t iters, size_t logAvgSkip) {
bmSkipTo<bm::Reader>(bm::list, bm::data, logAvgSkip, iters);
return iters;
}
BENCHMARK(SkipTo10_SkipQ128_1M) {
bmSkipTo<bm::Reader>(bm::list, bm::data, 10, bm::k1M);
}
BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 1, 0)
BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 2, 1)
BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 4_pm_1, 2)
BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 16_pm_4, 4)
BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 64_pm_16, 6)
BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 256_pm_64, 8)
BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 1024_pm_256, 10)
BENCHMARK(SkipTo100_SkipQ128_1M) {
bmSkipTo<bm::Reader>(bm::list, bm::data, 100, bm::k1M);
}
BENCHMARK(SkipTo1000_SkipQ128_1M) {
bmSkipTo<bm::Reader>(bm::list, bm::data, 1000, bm::k1M);
}
BENCHMARK(JumpTo_SkipQ128_1M) {
bmJumpTo<bm::Reader>(bm::list, bm::data, bm::order, bm::k1M);
BENCHMARK(JumpTo_SkipQ128, iters) {
bmJumpTo<bm::Reader>(bm::list, bm::data, bm::order, iters);
}
BENCHMARK_DRAW_LINE();
......@@ -165,33 +166,52 @@ BENCHMARK(Encode_10) {
list.free();
}
BENCHMARK(Encode_1M) {
BENCHMARK(Encode) {
auto list = bm::Encoder::encode(bm::encodeLargeData.begin(),
bm::encodeLargeData.end());
list.free();
}
#if 0
Intel(R) Xeon(R) CPU E5-2660 @ 2.7GHz (turbo on), using instructions::Nehalem.
BENCHMARK_DRAW_LINE();
BENCHMARK(Select64, iters) {
typedef instructions::EF_TEST_ARCH instr;
constexpr uint64_t kPrime = uint64_t(-59);
for (uint64_t x = kPrime, i = 0; i < iters; x *= kPrime, i += 1) {
size_t w = instr::popcount(x);
folly::doNotOptimizeAway(folly::select64<instr>(x, w - 1));
}
}
#if 0
Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz (turbo off),
using instructions::Haswell and GCC 4.9 with --bm_min_usec 100000.
============================================================================
folly/experimental/test/EliasFanoCodingTest.cpp relative time/iter iters/s
============================================================================
Next_1M 4.61ms 216.70
Skip1_ForwarQ128_1M 5.33ms 187.71
Skip10_ForwarQ128_1M 14.23ms 70.26
Skip100_ForwardQ128_1M 29.10ms 34.37
Skip1000_ForwardQ128_1M 21.15ms 47.28
Jump_ForwardQ128_1M 46.30ms 21.60
Next 2.52ns 397.28M
Skip_ForwardQ128(1) 3.92ns 255.28M
Skip_ForwardQ128(2) 5.08ns 197.04M
Skip_ForwardQ128(4_pm_1) 7.04ns 142.02M
Skip_ForwardQ128(16_pm_4) 19.68ns 50.82M
Skip_ForwardQ128(64_pm_16) 27.58ns 36.26M
Skip_ForwardQ128(256_pm_64) 32.49ns 30.78M
Skip_ForwardQ128(1024_pm_256) 33.39ns 29.95M
Jump_ForwardQ128 34.05ns 29.37M
----------------------------------------------------------------------------
SkipTo_SkipQ128(1) 4.42ns 226.49M
SkipTo_SkipQ128(2) 8.58ns 116.55M
SkipTo_SkipQ128(4_pm_1) 11.43ns 87.50M
SkipTo_SkipQ128(16_pm_4) 31.19ns 32.06M
SkipTo_SkipQ128(64_pm_16) 43.88ns 22.79M
SkipTo_SkipQ128(256_pm_64) 49.08ns 20.37M
SkipTo_SkipQ128(1024_pm_256) 52.24ns 19.14M
JumpTo_SkipQ128 54.61ns 18.31M
----------------------------------------------------------------------------
SkipTo1_SkipQ128_1M 12.03ms 83.15
SkipTo10_SkipQ128_1M 36.11ms 27.69
SkipTo100_SkipQ128_1M 42.91ms 23.30
SkipTo1000_SkipQ128_1M 36.92ms 27.08
JumpTo_SkipQ128_1M 78.51ms 12.74
Encode_10 117.24ns 8.53M
Encode 5.64ms 177.15
----------------------------------------------------------------------------
Encode_10 199.19ns 5.02M
Encode_1M 8.82ms 113.37
Select64 8.04ns 124.35M
============================================================================
#endif
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment