Commit 64eca5b8 authored by Nathan Bronson's avatar Nathan Bronson Committed by Facebook Github Bot

pack ItemIter using alignment bits rather than high bits

Summary:
Using the alignment bits to store the index for a packed
iterator is one of the steps toward better support of 32-bit platforms
(and pointer authentication in future platforms, such as ARMv8.3-A).
Previously this was complicated by a desire to make construction of
a packed ItemIter as cheap as possible.  The ItemPtr has a variable
number of bits available due to alignment, which may not be sufficient
to encode the index.  This diff observes that we can piggyback on the
chunk alignment to efficiently deduce the remaining bits of the index.
If sizeof(Item) gives us only 2 bits of alignment, for example, then we
can deduce 2 bits of the index from the Item*.

Differential Revision: D8452991

fbshipit-source-id: 40dfd9957da3733090bb947646c553620cc7d33d
parent 9d0b6c65
/*
* Copyright 2017-present Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <cstddef>
#include <cstdint>
#include <memory>
#include <type_traits>
#include <folly/Portability.h>
#include <folly/lang/SafeAssert.h>
namespace folly {
namespace f14 {
namespace detail {
template <typename Ptr>
using NonConstPtr = typename std::pointer_traits<Ptr>::template rebind<
std::remove_const_t<typename std::pointer_traits<Ptr>::element_type>>;
//////// TaggedPtr
template <typename Ptr>
class TaggedPtr {
public:
TaggedPtr() = default;
TaggedPtr(TaggedPtr const&) = default;
TaggedPtr(TaggedPtr&&) = default;
TaggedPtr& operator=(TaggedPtr const&) = default;
TaggedPtr& operator=(TaggedPtr&&) = default;
TaggedPtr(Ptr p, uint8_t e) noexcept : ptr_{p}, extra_{e} {}
/* implicit */ TaggedPtr(std::nullptr_t) noexcept {}
TaggedPtr& operator=(std::nullptr_t) noexcept {
ptr_ = nullptr;
extra_ = 0;
return *this;
}
typename std::pointer_traits<Ptr>::element_type& operator*() const noexcept {
return *ptr_;
}
typename std::pointer_traits<Ptr>::element_type* operator->() const noexcept {
return std::addressof(*ptr_);
}
Ptr ptr() const {
return ptr_;
}
void setPtr(Ptr p) {
ptr_ = p;
}
uint8_t extra() const {
return extra_;
}
void setExtra(uint8_t e) {
extra_ = e;
}
bool operator==(TaggedPtr const& rhs) const noexcept {
return ptr_ == rhs.ptr_ && extra_ == rhs.extra_;
}
bool operator!=(TaggedPtr const& rhs) const noexcept {
return !(*this == rhs);
}
bool operator<(TaggedPtr const& rhs) const noexcept {
return ptr_ != rhs.ptr_ ? ptr_ < rhs.ptr_ : extra_ < rhs.extra_;
}
bool operator==(std::nullptr_t) const noexcept {
return ptr_ == nullptr;
}
bool operator!=(std::nullptr_t) const noexcept {
return !(*this == nullptr);
}
private:
Ptr ptr_{};
uint8_t extra_{};
};
#if FOLLY_X64 || FOLLY_AARCH64
template <typename T>
class TaggedPtr<T*> {
public:
TaggedPtr() = default;
TaggedPtr(TaggedPtr const&) = default;
TaggedPtr(TaggedPtr&&) = default;
TaggedPtr& operator=(TaggedPtr const&) = default;
TaggedPtr& operator=(TaggedPtr&&) = default;
TaggedPtr(T* p, uint8_t e) noexcept
: raw_{(reinterpret_cast<uintptr_t>(p) << 8) | e} {
FOLLY_SAFE_DCHECK(ptr() == p, "");
}
/* implicit */ TaggedPtr(std::nullptr_t) noexcept : raw_{0} {}
TaggedPtr& operator=(std::nullptr_t) noexcept {
raw_ = 0;
return *this;
}
T& operator*() const noexcept {
return *ptr();
}
T* operator->() const noexcept {
return std::addressof(*ptr());
}
T* ptr() const {
return reinterpret_cast<T*>(raw_ >> 8);
}
void setPtr(T* p) {
*this = TaggedPtr{p, extra()};
FOLLY_SAFE_DCHECK(ptr() == p, "");
}
uint8_t extra() const {
return static_cast<uint8_t>(raw_);
}
void setExtra(uint8_t e) {
*this = TaggedPtr{ptr(), e};
}
bool operator==(TaggedPtr const& rhs) const {
return raw_ == rhs.raw_;
}
bool operator!=(TaggedPtr const& rhs) const {
return !(*this == rhs);
}
bool operator<(TaggedPtr const& rhs) const noexcept {
return raw_ < rhs.raw_;
}
bool operator==(std::nullptr_t) const noexcept {
return raw_ == 0;
}
bool operator!=(std::nullptr_t) const noexcept {
return !(*this == nullptr);
}
private:
// TODO: verify no high-bit extension needed on aarch64
uintptr_t raw_;
};
#endif // FOLLY_X64 || FOLLY_AARCH64
} // namespace detail
} // namespace f14
} // namespace folly
......@@ -33,6 +33,10 @@ namespace folly {
namespace f14 {
namespace detail {
template <typename Ptr>
using NonConstPtr = typename std::pointer_traits<Ptr>::template rebind<
std::remove_const_t<typename std::pointer_traits<Ptr>::element_type>>;
template <typename KeyType, typename MappedType>
using MapValueType = std::pair<KeyType const, MappedType>;
......
......@@ -45,7 +45,6 @@
#include <folly/container/detail/F14Defaults.h>
#include <folly/container/detail/F14IntrinsicsAvailability.h>
#include <folly/container/detail/F14Memory.h>
#if FOLLY_F14_VECTOR_INTRINSICS_AVAILABLE
#if FOLLY_AARCH64
......@@ -405,7 +404,7 @@ class FirstEmptyInMask {
};
template <typename ItemType>
struct alignas(max_align_t) F14Chunk {
struct alignas(16) F14Chunk {
using Item = ItemType;
// Assuming alignof(max_align_t) == 16 (and assuming alignof(Item) >=
......@@ -425,8 +424,9 @@ struct alignas(max_align_t) F14Chunk {
static constexpr MaskType kFullMask = FullMask<kCapacity>::value;
// Non-empty tags have their top bit set
std::array<uint8_t, kCapacity> tags_;
// Non-empty tags have their top bit set. tags_ array might be bigger
// than kCapacity to keep alignment of first item.
std::array<uint8_t, 14> tags_;
// Bits 0..3 record the actual capacity of the chunk if this is chunk
// zero, or hold 0000 for other chunks. Bits 4-7 are a 4-bit counter
......@@ -628,6 +628,150 @@ struct alignas(max_align_t) F14Chunk {
////////////////
// PackedChunkItemPtr points to an Item in an F14Chunk, allowing both the
// Item& and its index to be recovered. It sorts by the address of the
// item, and it only works for items that are in a properly-aligned chunk.
// generic form, not actually packed
template <typename Ptr>
class PackedChunkItemPtr {
public:
PackedChunkItemPtr(Ptr p, std::size_t i) noexcept
: ptr_{p}, index_{static_cast<unsigned>(i)} {
FOLLY_SAFE_DCHECK(ptr_ != nullptr || index_ == 0, "");
}
Ptr ptr() const {
return ptr_;
}
std::size_t index() const {
return index_;
}
bool operator<(PackedChunkItemPtr const& rhs) const {
FOLLY_SAFE_DCHECK(ptr_ != rhs.ptr_ || index_ == rhs.index_, "");
return ptr_ < rhs.ptr_;
}
bool operator==(PackedChunkItemPtr const& rhs) const {
FOLLY_SAFE_DCHECK(ptr_ != rhs.ptr_ || index_ == rhs.index_, "");
return ptr_ == rhs.ptr_;
}
bool operator!=(PackedChunkItemPtr const& rhs) const {
return !(*this == rhs);
}
private:
Ptr ptr_;
unsigned index_;
};
// Bare pointer form, packed into a uintptr_t. Uses only bits wasted by
// alignment, so it works on 32-bit and 64-bit platforms
template <typename T>
class PackedChunkItemPtr<T*> {
static_assert((alignof(F14Chunk<T>) % 16) == 0, "");
// Chunks are 16-byte aligned, so we can maintain a packed pointer to a
// chunk item by packing the 4-bit item index into the least significant
// bits of a pointer to the chunk itself. This makes ItemIter::pack
// more expensive, however, since it has to compute the chunk address.
//
// Chunk items have varying alignment constraints, so it would seem
// to be that we can't do a similar trick while using only bit masking
// operations on the Item* itself. It happens to be, however, that if
// sizeof(Item) is not a multiple of 16 then we can recover a portion
// of the index bits from the knowledge that the Item-s are stored in
// an array that is itself 16-byte aligned.
//
// If kAlignBits is the number of trailing zero bits in sizeof(Item)
// (up to 4), then we can borrow those bits to store kAlignBits of the
// index directly. We can recover (4 - kAlignBits) bits of the index
// from the item pointer itself, by defining/observing that
//
// A = kAlignBits (A <= 4)
//
// S = (sizeof(Item) % 16) >> A (shifted-away bits are all zero)
//
// R = (itemPtr % 16) >> A (shifted-away bits are all zero)
//
// M = 16 >> A
//
// itemPtr % 16 = (index * sizeof(Item)) % 16
//
// (R * 2^A) % 16 = (index * (sizeof(Item) % 16)) % 16
//
// (R * 2^A) % 16 = (index * 2^A * S) % 16
//
// R % M = (index * S) % M
//
// S is relatively prime with M, so a multiplicative inverse is easy
// to compute
//
// Sinv = S^(M - 1) % M
//
// (R * Sinv) % M = index % M
//
// This lets us recover the bottom bits of the index. When sizeof(T)
// is 8-byte aligned kSizeInverse will always be 1. When sizeof(T)
// is 4-byte aligned kSizeInverse will be either 1 or 3.
// returns pow(x, y) % m
static constexpr uintptr_t powerMod(uintptr_t x, uintptr_t y, uintptr_t m) {
return y == 0 ? 1 : (x * powerMod(x, y - 1, m)) % m;
}
static constexpr uintptr_t kIndexBits = 4;
static constexpr uintptr_t kIndexMask = (uintptr_t{1} << kIndexBits) - 1;
static constexpr uintptr_t kAlignBits = (sizeof(T) % 16) == 0
? 4
: (sizeof(T) % 8) == 0
? 3
: (sizeof(T) % 4) == 0 ? 2 : (sizeof(T) % 2) == 0 ? 1 : 0;
static constexpr uintptr_t kAlignMask = (uintptr_t{1} << kAlignBits) - 1;
static constexpr uintptr_t kModulus = uintptr_t{1}
<< (kIndexBits - kAlignBits);
static constexpr uintptr_t kSizeInverse =
powerMod(sizeof(T) >> kAlignBits, kModulus - 1, kModulus);
public:
PackedChunkItemPtr(T* p, std::size_t i) noexcept {
uintptr_t encoded = i >> (kIndexBits - kAlignBits);
folly::assume((encoded & ~kAlignMask) == 0);
raw_ = reinterpret_cast<uintptr_t>(p) | encoded;
FOLLY_SAFE_DCHECK(p == ptr(), "");
FOLLY_SAFE_DCHECK(i == index(), "");
}
T* ptr() const {
return reinterpret_cast<T*>(raw_ & ~kAlignMask);
}
std::size_t index() const {
auto encoded = (raw_ & kAlignMask) << (kIndexBits - kAlignBits);
auto deduced =
((raw_ >> kAlignBits) * kSizeInverse) & (kIndexMask >> kAlignBits);
return encoded | deduced;
}
bool operator<(PackedChunkItemPtr const& rhs) const {
return raw_ < rhs.raw_;
}
bool operator==(PackedChunkItemPtr const& rhs) const {
return raw_ == rhs.raw_;
}
bool operator!=(PackedChunkItemPtr const& rhs) const {
return !(*this == rhs);
}
private:
uintptr_t raw_;
};
template <typename ChunkPtr>
class F14ItemIter {
private:
......@@ -639,7 +783,7 @@ class F14ItemIter {
using ItemConstPtr =
typename std::pointer_traits<ChunkPtr>::template rebind<Item const>;
using Packed = TaggedPtr<ItemPtr>;
using Packed = PackedChunkItemPtr<ItemPtr>;
//// PUBLIC
......@@ -648,7 +792,7 @@ class F14ItemIter {
// default copy and move constructors and assignment operators are correct
explicit F14ItemIter(Packed const& packed)
: itemPtr_{packed.ptr()}, index_{packed.extra()} {}
: itemPtr_{packed.ptr()}, index_{packed.index()} {}
F14ItemIter(ChunkPtr chunk, std::size_t index)
: itemPtr_{std::pointer_traits<ItemPtr>::pointer_to(chunk->item(index))},
......
......@@ -1004,6 +1004,45 @@ TEST(F14FastSet, visitContiguousRanges) {
runVisitContiguousRangesTest<F14FastSet<int>>();
}
namespace {
struct CharArrayHasher {
template <std::size_t N>
std::size_t operator()(std::array<char, N> const& value) const {
return folly::Hash{}(StringPiece{value.begin(), value.end()});
}
};
template <
template <typename, typename, typename, typename> class S,
std::size_t N>
struct RunAllValueSizeTests {
void operator()() const {
using Key = std::array<char, N>;
static_assert(sizeof(Key) == N, "");
S<Key, CharArrayHasher, std::equal_to<Key>, std::allocator<Key>> set;
for (int i = 0; i < 100; ++i) {
Key key{static_cast<char>(i)};
set.insert(key);
}
while (!set.empty()) {
set.erase(set.begin());
}
RunAllValueSizeTests<S, N - 1>{}();
}
};
template <template <typename, typename, typename, typename> class S>
struct RunAllValueSizeTests<S, 0> {
void operator()() const {}
};
} // namespace
TEST(F14ValueSet, valueSize) {
RunAllValueSizeTests<F14ValueSet, 32>{}();
}
///////////////////////////////////
#endif // FOLLY_F14_VECTOR_INTRINSICS_AVAILABLE
///////////////////////////////////
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment