Commit 56768cc0 authored by Aaryaman Sagar's avatar Aaryaman Sagar Committed by Facebook Github Bot

fetch_set and fetch_reset for single bit setting and resetting

Summary:
lock bts and lock btr with the varying lengths turn out to be faster than
a single CAS instruction

fetch_set uses lock bts and fetch_reset uses lock btr when possible.
std::atomic::fetch_or and std::atomic::fetch_and with a mask otherwise

Reviewed By: yfeldblum

Differential Revision: D9652759

fbshipit-source-id: e6d10fdb119a10e15e32131c7389f1d4203a0d91
parent 99ef4c99
......@@ -674,6 +674,7 @@ if (BUILD_TESTS)
DIRECTORY synchronization/test/
TEST atomic_struct_test SOURCES AtomicStructTest.cpp
TEST small_locks_test SOURCES SmallLocksTest.cpp
TEST atomic_util_test SOURCES AtomicUtilTest.cpp
DIRECTORY test/
TEST ahm_int_stress_test SOURCES AHMIntStressTest.cpp
......
/*
* Copyright 2004-present Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <folly/Portability.h>
#include <folly/Traits.h>
#include <atomic>
#include <cassert>
#include <cstdint>
#include <tuple>
#include <type_traits>
namespace folly {
namespace detail {
// TODO: Remove the non-default implementations when both gcc and clang
// can recognize single bit set/reset patterns and compile them down to locked
// bts and btr instructions.
//
// Currently, at the time of writing it seems like gcc7 and greater can make
// this optimization and clang cannot - https://gcc.godbolt.org/z/Q83rxX
template <typename Atomic>
bool atomic_fetch_set_default(
Atomic& atomic,
std::size_t bit,
std::memory_order order) {
using Integer = decltype(atomic.load());
auto mask = Integer{0b1} << static_cast<Integer>(bit);
return (atomic.fetch_or(mask, order) & mask);
}
template <typename Atomic>
bool atomic_fetch_reset_default(
Atomic& atomic,
std::size_t bit,
std::memory_order order) {
using Integer = decltype(atomic.load());
auto mask = Integer{0b1} << static_cast<Integer>(bit);
return (atomic.fetch_and(~mask, order) & mask);
}
/**
* A simple trait to determine if the given type is an instantiation of
* std::atomic
*/
template <typename T>
constexpr auto is_atomic = false;
template <typename Integer>
constexpr auto is_atomic<std::atomic<Integer>> = true;
#if FOLLY_X64
#if _MSC_VER
template <typename Integer>
inline bool atomic_fetch_set_x86(
std::atomic<Integer>& atomic,
std::size_t bit,
std::memory_order order) {
static_assert(alignof(std::atomic<Integer>) == alignof(Integer), "");
static_assert(sizeof(std::atomic<Integer>) == sizeof(Integer), "");
assert(atomic.is_lock_free());
if /* constexpr */ (sizeof(Integer) == 4) {
return _interlockedbittestandset(
reinterpret_cast<volatile long*>(&atomic), static_cast<long>(bit));
} else if /* constexpr */ (sizeof(Integer) == 8) {
return _interlockedbittestandset64(
reinterpret_cast<volatile long long*>(&atomic),
static_cast<long long>(bit));
} else {
assert(sizeof(Integer) != 4 && sizeof(Integer) != 8);
return atomic_fetch_set_default(atomic, bit, order);
}
}
template <typename Atomic>
inline bool
atomic_fetch_set_x86(Atomic& atomic, std::size_t bit, std::memory_order order) {
static_assert(!std::is_same<Atomic, std::atomic<std::uint32_t>>{}, "");
static_assert(!std::is_same<Atomic, std::atomic<std::uint64_t>>{}, "");
return atomic_fetch_set_default(atomic, bit, order);
}
template <typename Integer>
inline bool atomic_fetch_reset_x86(
std::atomic<Integer>& atomic,
std::size_t bit,
std::memory_order order) {
static_assert(alignof(std::atomic<Integer>) == alignof(Integer), "");
static_assert(sizeof(std::atomic<Integer>) == sizeof(Integer), "");
assert(atomic.is_lock_free());
if /* constexpr */ (sizeof(Integer) == 4) {
return _interlockedbittestandreset(
reinterpret_cast<volatile long*>(&atomic), static_cast<long>(bit));
} else if /* constexpr */ (sizeof(Integer) == 8) {
return _interlockedbittestandreset64(
reinterpret_cast<volatile long long*>(&atomic),
static_cast<long long>(bit));
} else {
assert(sizeof(Integer) != 4 && sizeof(Integer) != 8);
return atomic_fetch_reset_default(atomic, bit, order);
}
}
template <typename Atomic>
inline bool
atomic_fetch_reset_x86(Atomic& atomic, std::size_t bit, std::memory_order mo) {
static_assert(!std::is_same<Atomic, std::atomic<std::uint32_t>>{}, "");
static_assert(!std::is_same<Atomic, std::atomic<std::uint64_t>>{}, "");
return atomic_fetch_reset_default(atomic, bit, mo);
}
#else
template <typename Integer>
inline bool atomic_fetch_set_x86(
std::atomic<Integer>& atomic,
std::size_t bit,
std::memory_order order) {
auto previous = false;
if /* constexpr */ (sizeof(Integer) == 2) {
auto pointer = reinterpret_cast<std::uint16_t*>(&atomic);
asm volatile("lock; btsw %1, (%2); setc %0"
: "=r"(previous)
: "ri"(static_cast<std::uint16_t>(bit)), "r"(pointer)
: "memory", "flags");
} else if /* constexpr */ (sizeof(Integer) == 4) {
auto pointer = reinterpret_cast<std::uint32_t*>(&atomic);
asm volatile("lock; btsl %1, (%2); setc %0"
: "=r"(previous)
: "ri"(static_cast<std::uint32_t>(bit)), "r"(pointer)
: "memory", "flags");
} else if /* constexpr */ (sizeof(Integer) == 8) {
auto pointer = reinterpret_cast<std::uint64_t*>(&atomic);
asm volatile("lock; btsq %1, (%2); setc %0"
: "=r"(previous)
: "ri"(static_cast<std::uint64_t>(bit)), "r"(pointer)
: "memory", "flags");
} else {
assert(sizeof(Integer) == 1);
return atomic_fetch_set_default(atomic, bit, order);
}
return previous;
}
template <typename Atomic>
inline bool
atomic_fetch_set_x86(Atomic& atomic, std::size_t bit, std::memory_order order) {
static_assert(!is_atomic<Atomic>, "");
return atomic_fetch_set_default(atomic, bit, order);
}
template <typename Integer>
inline bool atomic_fetch_reset_x86(
std::atomic<Integer>& atomic,
std::size_t bit,
std::memory_order order) {
auto previous = false;
if /* constexpr */ (sizeof(Integer) == 2) {
auto pointer = reinterpret_cast<std::uint16_t*>(&atomic);
asm volatile("lock; btrw %1, (%2); setc %0"
: "=r"(previous)
: "ri"(static_cast<std::uint16_t>(bit)), "r"(pointer)
: "memory", "flags");
} else if /* constexpr */ (sizeof(Integer) == 4) {
auto pointer = reinterpret_cast<std::uint32_t*>(&atomic);
asm volatile("lock; btrl %1, (%2); setc %0"
: "=r"(previous)
: "ri"(static_cast<std::uint32_t>(bit)), "r"(pointer)
: "memory", "flags");
} else if /* constexpr */ (sizeof(Integer) == 8) {
auto pointer = reinterpret_cast<std::uint64_t*>(&atomic);
asm volatile("lock; btrq %1, (%2); setc %0"
: "=r"(previous)
: "ri"(static_cast<std::uint64_t>(bit)), "r"(pointer)
: "memory", "flags");
} else {
assert(sizeof(Integer) == 1);
return atomic_fetch_reset_default(atomic, bit, order);
}
return previous;
}
template <typename Atomic>
bool atomic_fetch_reset_x86(
Atomic& atomic,
std::size_t bit,
std::memory_order order) {
static_assert(!is_atomic<Atomic>, "");
return atomic_fetch_reset_default(atomic, bit, order);
}
#endif
#else
template <typename Atomic>
bool atomic_fetch_set_x86(Atomic&, std::size_t, std::memory_order) noexcept {
throw std::logic_error{"Incorrect function called"};
}
template <typename Atomic>
bool atomic_fetch_reset_x86(Atomic&, std::size_t, std::memory_order) noexcept {
throw std::logic_error{"Incorrect function called"};
}
#endif
} // namespace detail
template <typename Atomic>
bool atomic_fetch_set(Atomic& atomic, std::size_t bit, std::memory_order mo) {
using Integer = decltype(atomic.load());
static_assert(std::is_unsigned<Integer>{}, "");
static_assert(!std::is_const<Atomic>{}, "");
assert(bit < (sizeof(Integer) * 8));
if (folly::kIsArchAmd64) {
// do the optimized thing on x86 builds
return detail::atomic_fetch_set_x86(atomic, bit, mo);
} else {
// otherwise default to the default implementation using fetch_or()
return detail::atomic_fetch_set_default(atomic, bit, mo);
}
}
template <typename Atomic>
bool atomic_fetch_reset(Atomic& atomic, std::size_t bit, std::memory_order mo) {
using Integer = decltype(atomic.load());
static_assert(std::is_unsigned<Integer>{}, "");
static_assert(!std::is_const<Atomic>{}, "");
assert(bit < (sizeof(Integer) * 8));
if (folly::kIsArchAmd64) {
// do the optimized thing on x86 builds
return detail::atomic_fetch_reset_x86(atomic, bit, mo);
} else {
// otherwise default to the default implementation using fetch_and()
return detail::atomic_fetch_reset_default(atomic, bit, mo);
}
}
} // namespace folly
/*
* Copyright 2017-present Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <atomic>
#include <cstdint>
namespace folly {
/**
* Sets a bit at the given index in the binary representation of the integer
* to 1. Returns the previous value of the bit, so true if the bit was not
* changed, false otherwise
*
* On some architectures, using this is more efficient than the corresponding
* std::atomic::fetch_or() with a mask. For example to set the first (least
* significant) bit of an integer, you could do atomic.fetch_or(0b1)
*
* The efficiency win is only visible in x86 (yet) and comes from the
* implementation using the x86 bts instruction when possible.
*
* When something other than std::atomic is passed, the implementation assumed
* incompatibility with this interface and calls Atomic::fetch_or()
*/
template <typename Atomic>
bool atomic_fetch_set(
Atomic& atomic,
std::size_t bit,
std::memory_order order = std::memory_order_seq_cst);
/**
* Resets a bit at the given index in the binary representation of the integer
* to 0. Returns the previous value of the bit, so true if the bit was
* changed, false otherwise
*
* This follows the same underlying principle and implementation as
* fetch_set(). Using the optimized implementation when possible and falling
* back to std::atomic::fetch_and() when in debug mode or in an architecture
* where an optimization is not possible
*/
template <typename Atomic>
bool atomic_fetch_reset(
Atomic& atomic,
std::size_t bit,
std::memory_order order = std::memory_order_seq_cst);
} // namespace folly
#include <folly/synchronization/AtomicUtil-inl.h>
/*
* Copyright 2018-present Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <folly/synchronization/AtomicUtil.h>
#include <folly/Benchmark.h>
#include <folly/Portability.h>
#include <folly/Utility.h>
#include <folly/portability/GTest.h>
namespace folly {
namespace {
auto default_fetch_set = [](auto&&... args) {
return atomic_fetch_set(args...);
};
auto default_fetch_reset = [](auto&&... args) {
return atomic_fetch_reset(args...);
};
template <typename Integer, typename FetchSet = decltype(default_fetch_set)>
void atomic_fetch_set_basic(FetchSet fetch_set = default_fetch_set) {
{
auto&& atomic = std::atomic<Integer>{0};
EXPECT_EQ(fetch_set(atomic, 0), false);
EXPECT_EQ(fetch_set(atomic, 1), false);
EXPECT_EQ(atomic.load(), 0b11);
EXPECT_EQ(fetch_set(atomic, 2), false);
EXPECT_EQ(atomic.load(), 0b111);
}
{
auto&& atomic = std::atomic<Integer>{0b1};
EXPECT_EQ(fetch_set(atomic, 0), true);
EXPECT_EQ(fetch_set(atomic, 0), true);
EXPECT_EQ(fetch_set(atomic, 1), false);
EXPECT_EQ(atomic.load(), 0b11);
EXPECT_EQ(fetch_set(atomic, 2), false);
EXPECT_EQ(atomic.load(), 0b111);
}
{
for (auto i = 0; i < 100000; ++i) {
// call makeUnpredictable() to ensure that the bit integer does not get
// optimized away. This is testing the feasability of this code in
// situations where bit is not known at compile time and will likely force
// a register load
auto&& atomic = std::atomic<Integer>{0};
auto&& bit = 0;
folly::makeUnpredictable(bit);
EXPECT_EQ(fetch_set(atomic, bit), false);
EXPECT_EQ(fetch_set(atomic, bit + 1), false);
EXPECT_EQ(atomic.load(), 0b11);
EXPECT_EQ(fetch_set(atomic, bit + 2), false);
EXPECT_EQ(atomic.load(), 0b111);
}
}
}
template <typename Integer, typename FetchReset = decltype(default_fetch_reset)>
void atomic_fetch_reset_basic(FetchReset fetch_reset = default_fetch_reset) {
{
auto&& atomic = std::atomic<Integer>{0};
EXPECT_EQ(fetch_reset(atomic, 0), false);
EXPECT_EQ(fetch_reset(atomic, 1), false);
atomic.store(0b11);
EXPECT_EQ(fetch_reset(atomic, 0), true);
EXPECT_EQ(fetch_reset(atomic, 1), true);
EXPECT_EQ(atomic.load(), 0);
}
{
auto&& atomic = std::atomic<Integer>{0};
EXPECT_EQ(fetch_reset(atomic, 0), false);
EXPECT_EQ(fetch_reset(atomic, 1), false);
atomic.store(0b11);
EXPECT_EQ(fetch_reset(atomic, 1), true);
EXPECT_EQ(fetch_reset(atomic, 0), true);
EXPECT_EQ(atomic.load(), 0);
}
}
template <typename Integer>
class Atomic {
public:
Atomic(std::function<void()> onFetchOr, std::function<void()> onFetchAnd)
: onFetchOr_{std::move(onFetchOr)}, onFetchAnd_{std::move(onFetchAnd)} {}
Integer fetch_or(
Integer value,
std::memory_order = std::memory_order_seq_cst) {
onFetchOr_();
return exchange(integer_, integer_ | value);
}
Integer fetch_and(
Integer value,
std::memory_order = std::memory_order_seq_cst) {
onFetchAnd_();
return exchange(integer_, integer_ & value);
}
Integer load(std::memory_order = std::memory_order_seq_cst) {
return integer_;
}
std::function<void()> onFetchOr_;
std::function<void()> onFetchAnd_;
Integer integer_{0};
};
template <typename Integer, typename FetchSet = decltype(default_fetch_set)>
void atomic_fetch_set_non_std_atomic(FetchSet fetch_set = default_fetch_set) {
auto sets = 0;
auto resets = 0;
auto atomic = Atomic<Integer>{[&] { ++sets; }, [&] { ++resets; }};
fetch_set(atomic, 0);
EXPECT_EQ(sets, 1);
EXPECT_EQ(resets, 0);
EXPECT_EQ(atomic.integer_, 0b1);
fetch_set(atomic, 2);
EXPECT_EQ(sets, 2);
EXPECT_EQ(resets, 0);
EXPECT_EQ(atomic.integer_, 0b101);
}
template <typename Integer, typename F = decltype(default_fetch_reset)>
void atomic_fetch_reset_non_std_atomic(F fetch_reset = default_fetch_reset) {
auto sets = 0;
auto resets = 0;
auto atomic = Atomic<Integer>{[&] { ++sets; }, [&] { ++resets; }};
atomic.integer_ = 0b111;
fetch_reset(atomic, 0);
EXPECT_EQ(sets, 0);
EXPECT_EQ(resets, 1);
EXPECT_EQ(atomic.integer_, 0b110);
fetch_reset(atomic, 2);
EXPECT_EQ(sets, 0);
EXPECT_EQ(resets, 2);
EXPECT_EQ(atomic.integer_, 0b010);
}
} // namespace
class AtomicFetchSetTest : public ::testing::Test {};
class AtomicFetchResetTest : public ::testing::Test {};
TEST_F(AtomicFetchSetTest, Basic) {
atomic_fetch_set_basic<std::uint16_t>();
atomic_fetch_set_basic<std::uint32_t>();
atomic_fetch_set_basic<std::uint64_t>();
atomic_fetch_set_basic<std::uint8_t>();
}
TEST_F(AtomicFetchResetTest, Basic) {
atomic_fetch_reset_basic<std::uint16_t>();
atomic_fetch_reset_basic<std::uint32_t>();
atomic_fetch_reset_basic<std::uint64_t>();
atomic_fetch_reset_basic<std::uint8_t>();
}
TEST_F(AtomicFetchSetTest, EnsureFetchOrUsed) {
atomic_fetch_set_non_std_atomic<std::uint8_t>();
atomic_fetch_set_non_std_atomic<std::uint16_t>();
atomic_fetch_set_non_std_atomic<std::uint32_t>();
atomic_fetch_set_non_std_atomic<std::uint64_t>();
}
TEST_F(AtomicFetchResetTest, EnsureFetchAndUsed) {
atomic_fetch_reset_non_std_atomic<std::uint8_t>();
atomic_fetch_reset_non_std_atomic<std::uint16_t>();
atomic_fetch_reset_non_std_atomic<std::uint32_t>();
atomic_fetch_reset_non_std_atomic<std::uint64_t>();
}
TEST_F(AtomicFetchSetTest, FetchSetDefault) {
auto fetch_set = [](auto&&... args) {
return detail::atomic_fetch_set_default(args..., std::memory_order_seq_cst);
};
atomic_fetch_set_basic<std::uint16_t>(fetch_set);
atomic_fetch_set_basic<std::uint32_t>(fetch_set);
atomic_fetch_set_basic<std::uint64_t>(fetch_set);
atomic_fetch_set_basic<std::uint8_t>(fetch_set);
atomic_fetch_set_non_std_atomic<std::uint8_t>(fetch_set);
atomic_fetch_set_non_std_atomic<std::uint16_t>(fetch_set);
atomic_fetch_set_non_std_atomic<std::uint32_t>(fetch_set);
atomic_fetch_set_non_std_atomic<std::uint64_t>(fetch_set);
}
TEST_F(AtomicFetchSetTest, FetchResetDefault) {
auto fetch_reset = [](auto&&... args) {
return detail::atomic_fetch_reset_default(
args..., std::memory_order_seq_cst);
};
atomic_fetch_reset_basic<std::uint16_t>(fetch_reset);
atomic_fetch_reset_basic<std::uint32_t>(fetch_reset);
atomic_fetch_reset_basic<std::uint64_t>(fetch_reset);
atomic_fetch_reset_basic<std::uint8_t>(fetch_reset);
atomic_fetch_reset_non_std_atomic<std::uint8_t>(fetch_reset);
atomic_fetch_reset_non_std_atomic<std::uint16_t>(fetch_reset);
atomic_fetch_reset_non_std_atomic<std::uint32_t>(fetch_reset);
atomic_fetch_reset_non_std_atomic<std::uint64_t>(fetch_reset);
}
TEST_F(AtomicFetchSetTest, FetchSetX86) {
if (folly::kIsArchAmd64) {
auto fetch_set = [](auto&&... args) {
return detail::atomic_fetch_set_x86(args..., std::memory_order_seq_cst);
};
atomic_fetch_set_basic<std::uint16_t>(fetch_set);
atomic_fetch_set_basic<std::uint32_t>(fetch_set);
atomic_fetch_set_basic<std::uint64_t>(fetch_set);
atomic_fetch_set_basic<std::uint8_t>(fetch_set);
atomic_fetch_set_non_std_atomic<std::uint8_t>(fetch_set);
atomic_fetch_set_non_std_atomic<std::uint16_t>(fetch_set);
atomic_fetch_set_non_std_atomic<std::uint32_t>(fetch_set);
atomic_fetch_set_non_std_atomic<std::uint64_t>(fetch_set);
}
}
TEST_F(AtomicFetchResetTest, FetchResetX86) {
if (folly::kIsArchAmd64) {
auto fetch_reset = [](auto&&... args) {
return detail::atomic_fetch_reset_x86(args..., std::memory_order_seq_cst);
};
atomic_fetch_reset_basic<std::uint16_t>(fetch_reset);
atomic_fetch_reset_basic<std::uint32_t>(fetch_reset);
atomic_fetch_reset_basic<std::uint64_t>(fetch_reset);
atomic_fetch_reset_basic<std::uint8_t>(fetch_reset);
atomic_fetch_reset_non_std_atomic<std::uint8_t>(fetch_reset);
atomic_fetch_reset_non_std_atomic<std::uint16_t>(fetch_reset);
atomic_fetch_reset_non_std_atomic<std::uint32_t>(fetch_reset);
atomic_fetch_reset_non_std_atomic<std::uint64_t>(fetch_reset);
}
}
} // namespace folly
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment