Commit aa68792c authored by Yedidya Feldblum's avatar Yedidya Feldblum Committed by Facebook GitHub Bot

atomic_fetch_flip

Summary:
The flip operation, like the set and reset operations, are optimizable on x86-related architectures using the `btc` instruction, compared with `bts` and `btr` for set and reset operations.

The optimization is done for gnu-like compilers. However, for MSVC, there is no interlocked intrinsic for `btc`, so the optimization is not done.

Reviewed By: luciang

Differential Revision: D32049844

fbshipit-source-id: 5f60f8c4c534fb0ee41d76c5d766f96f7ff94892
parent 49bf3d46
......@@ -127,6 +127,14 @@ bool atomic_fetch_reset_fallback(
return (atomic.fetch_and(Integer(~mask), order) & mask);
}
template <typename Atomic>
bool atomic_fetch_flip_fallback(
Atomic& atomic, std::size_t bit, std::memory_order order) {
using Integer = decltype(atomic.load());
auto mask = Integer(Integer{0b1} << bit);
return (atomic.fetch_xor(mask, order) & mask);
}
/**
* A simple trait to determine if the given type is an instantiation of
* std::atomic
......@@ -196,6 +204,14 @@ inline bool atomic_fetch_reset_native(
return atomic_fetch_reset_fallback(atomic, bit, mo);
}
template <typename Atomic>
inline bool atomic_fetch_flip_native(
Atomic& atomic, std::size_t bit, std::memory_order mo) {
static_assert(!std::is_same<Atomic, std::atomic<std::uint32_t>>{}, "");
static_assert(!std::is_same<Atomic, std::atomic<std::uint64_t>>{}, "");
return atomic_fetch_flip_fallback(atomic, bit, mo);
}
#else
template <typename Integer>
......@@ -274,6 +290,44 @@ bool atomic_fetch_reset_native(
return atomic_fetch_reset_fallback(atomic, bit, order);
}
template <typename Integer>
inline bool atomic_fetch_flip_native(
std::atomic<Integer>& atomic, std::size_t bit, std::memory_order order) {
auto previous = false;
if /* constexpr */ (sizeof(Integer) == 2) {
auto pointer = reinterpret_cast<std::uint16_t*>(&atomic);
asm volatile("lock; btcw %1, (%2); setc %0"
: "=r"(previous)
: "ri"(static_cast<std::uint16_t>(bit)), "r"(pointer)
: "memory", "flags");
} else if /* constexpr */ (sizeof(Integer) == 4) {
auto pointer = reinterpret_cast<std::uint32_t*>(&atomic);
asm volatile("lock; btcl %1, (%2); setc %0"
: "=r"(previous)
: "ri"(static_cast<std::uint32_t>(bit)), "r"(pointer)
: "memory", "flags");
} else if /* constexpr */ (sizeof(Integer) == 8) {
auto pointer = reinterpret_cast<std::uint64_t*>(&atomic);
asm volatile("lock; btcq %1, (%2); setc %0"
: "=r"(previous)
: "ri"(static_cast<std::uint64_t>(bit)), "r"(pointer)
: "memory", "flags");
} else {
assert(sizeof(Integer) == 1);
return atomic_fetch_flip_fallback(atomic, bit, order);
}
return previous;
}
template <typename Atomic>
bool atomic_fetch_flip_native(
Atomic& atomic, std::size_t bit, std::memory_order order) {
static_assert(!is_atomic<Atomic>, "");
return atomic_fetch_flip_fallback(atomic, bit, order);
}
#endif
#else
......@@ -289,6 +343,12 @@ bool atomic_fetch_reset_native(
// This should never be called on non x86_64 platforms.
std::terminate();
}
template <typename Atomic>
bool atomic_fetch_flip_native(
Atomic&, std::size_t, std::memory_order) noexcept {
// This should never be called on non x86_64 platforms.
std::terminate();
}
#endif
......@@ -328,4 +388,21 @@ bool atomic_fetch_reset(Atomic& atomic, std::size_t bit, std::memory_order mo) {
}
}
template <typename Atomic>
bool atomic_fetch_flip(Atomic& atomic, std::size_t bit, std::memory_order mo) {
using Integer = decltype(atomic.load());
static_assert(std::is_unsigned<Integer>{}, "");
static_assert(!std::is_const<Atomic>{}, "");
assert(bit < (sizeof(Integer) * 8));
// do the optimized thing on x86 builds. Also, some versions of TSAN do not
// properly instrument the inline assembly, so avoid it when TSAN is enabled
if (folly::kIsArchAmd64 && !folly::kIsSanitizeThread) {
return detail::atomic_fetch_flip_native(atomic, bit, mo);
} else {
// otherwise default to the default implementation using fetch_and()
return detail::atomic_fetch_flip_fallback(atomic, bit, mo);
}
}
} // namespace folly
......@@ -89,6 +89,24 @@ bool atomic_fetch_reset(
std::size_t bit,
std::memory_order order = std::memory_order_seq_cst);
// atomic_fetch_flip
//
// Flips the bit at the given index in the binary representation of the integer
// from 1 to 0 or from 0 to 1. Returns the previous value of the bit.
//
// Equivalent to Atomic::fetch_xor with a mask. For example, if the bit
// argument to this function is 1, the mask passed to the corresponding
// Atomic::fetch_xor would be 0b1.
//
// Uses an optimized implementation when available, otherwise falling back to
// Atomic::fetch_xor with mask. The optimization is currently available for
// std::atomic on x86, using the btc instruction.
template <typename Atomic>
bool atomic_fetch_flip(
Atomic& atomic,
std::size_t bit,
std::memory_order order = std::memory_order_seq_cst);
} // namespace folly
#include <folly/synchronization/AtomicUtil-inl.h>
......@@ -110,6 +110,11 @@ FOLLY_ATOMIC_FETCH_BIT_OP_CHECK_FIX(reset, 8, 3)
FOLLY_ATOMIC_FETCH_BIT_OP_CHECK_VAR(reset, 16)
FOLLY_ATOMIC_FETCH_BIT_OP_CHECK_FIX(reset, 16, 3)
FOLLY_ATOMIC_FETCH_BIT_OP_CHECK_FIX(reset, 16, 11)
FOLLY_ATOMIC_FETCH_BIT_OP_CHECK_VAR(flip, 8)
FOLLY_ATOMIC_FETCH_BIT_OP_CHECK_FIX(flip, 8, 3)
FOLLY_ATOMIC_FETCH_BIT_OP_CHECK_VAR(flip, 16)
FOLLY_ATOMIC_FETCH_BIT_OP_CHECK_FIX(flip, 16, 3)
FOLLY_ATOMIC_FETCH_BIT_OP_CHECK_FIX(flip, 16, 11)
#undef FOLLY_ATOMIC_FETCH_BIT_OP_CHECK_FIX
#undef FOLLY_ATOMIC_FETCH_BIT_OP_CHECK_VAR
......@@ -117,8 +122,10 @@ FOLLY_ATOMIC_FETCH_BIT_OP_CHECK_FIX(reset, 16, 11)
namespace atomic_util_access {
FOLLY_CREATE_FREE_INVOKER_SUITE(atomic_fetch_set, folly);
FOLLY_CREATE_FREE_INVOKER_SUITE(atomic_fetch_reset, folly);
FOLLY_CREATE_FREE_INVOKER_SUITE(atomic_fetch_flip, folly);
FOLLY_CREATE_FREE_INVOKER_SUITE(atomic_fetch_set_fallback, folly::detail);
FOLLY_CREATE_FREE_INVOKER_SUITE(atomic_fetch_reset_fallback, folly::detail);
FOLLY_CREATE_FREE_INVOKER_SUITE(atomic_fetch_flip_fallback, folly::detail);
} // namespace atomic_util_access
namespace {
......
......@@ -108,8 +108,10 @@ TEST_F(AtomicCompareExchangeSuccTest, examples) {
namespace access {
FOLLY_CREATE_FREE_INVOKER_SUITE(atomic_fetch_set, folly);
FOLLY_CREATE_FREE_INVOKER_SUITE(atomic_fetch_reset, folly);
FOLLY_CREATE_FREE_INVOKER_SUITE(atomic_fetch_flip, folly);
FOLLY_CREATE_FREE_INVOKER_SUITE(atomic_fetch_set_fallback, folly::detail);
FOLLY_CREATE_FREE_INVOKER_SUITE(atomic_fetch_reset_fallback, folly::detail);
FOLLY_CREATE_FREE_INVOKER_SUITE(atomic_fetch_flip_fallback, folly::detail);
} // namespace access
namespace {
......@@ -186,6 +188,29 @@ void atomic_fetch_reset_basic(Op fetch_reset = {}) {
}
}
template <typename Integer, typename Op = access::atomic_fetch_flip_fn>
void atomic_fetch_flip_basic(Op fetch_flip = {}) {
{
auto&& atomic = std::atomic<Integer>{0};
EXPECT_EQ(fetch_flip(atomic, 0), false);
EXPECT_EQ(fetch_flip(atomic, 1), false);
atomic.store(0b11);
EXPECT_EQ(fetch_flip(atomic, 0), true);
EXPECT_EQ(fetch_flip(atomic, 1), true);
EXPECT_EQ(atomic.load(), 0);
}
{
auto&& atomic = std::atomic<Integer>{0};
EXPECT_EQ(fetch_flip(atomic, 0), false);
EXPECT_EQ(fetch_flip(atomic, 1), false);
atomic.store(0b10);
EXPECT_EQ(fetch_flip(atomic, 1), true);
EXPECT_EQ(fetch_flip(atomic, 0), false);
EXPECT_EQ(atomic.load(), 0b01);
}
}
template <typename Integer>
class Atomic {
public:
......@@ -199,6 +224,11 @@ class Atomic {
++counts.reset;
return std::exchange(integer_, integer_ & value);
}
Integer fetch_xor(
Integer value, std::memory_order = std::memory_order_seq_cst) {
++counts.flip;
return std::exchange(integer_, integer_ ^ value);
}
Integer load(std::memory_order = std::memory_order_seq_cst) {
return integer_;
......@@ -209,6 +239,7 @@ class Atomic {
struct counts_ {
size_t set{0};
size_t reset{0};
size_t flip{0};
};
counts_ counts;
};
......@@ -218,15 +249,18 @@ void atomic_fetch_set_non_std_atomic(Op fetch_set = {}) {
auto atomic = Atomic<Integer>{};
auto& sets = atomic.counts.set;
auto& resets = atomic.counts.reset;
auto& flips = atomic.counts.flip;
fetch_set(atomic, 0);
EXPECT_EQ(sets, 1);
EXPECT_EQ(resets, 0);
EXPECT_EQ(flips, 0);
EXPECT_EQ(atomic.integer_, 0b1);
fetch_set(atomic, 2);
EXPECT_EQ(sets, 2);
EXPECT_EQ(resets, 0);
EXPECT_EQ(flips, 0);
EXPECT_EQ(atomic.integer_, 0b101);
}
......@@ -235,22 +269,47 @@ void atomic_fetch_reset_non_std_atomic(Op fetch_reset = {}) {
auto atomic = Atomic<Integer>{};
auto& sets = atomic.counts.set;
auto& resets = atomic.counts.reset;
auto& flips = atomic.counts.flip;
atomic.integer_ = 0b111;
fetch_reset(atomic, 0);
EXPECT_EQ(sets, 0);
EXPECT_EQ(resets, 1);
EXPECT_EQ(flips, 0);
EXPECT_EQ(atomic.integer_, 0b110);
fetch_reset(atomic, 2);
EXPECT_EQ(sets, 0);
EXPECT_EQ(resets, 2);
EXPECT_EQ(flips, 0);
EXPECT_EQ(atomic.integer_, 0b010);
}
template <typename Integer, typename Op = access::atomic_fetch_flip_fn>
void atomic_fetch_flip_non_std_atomic(Op fetch_flip = {}) {
auto atomic = Atomic<Integer>{};
auto& sets = atomic.counts.set;
auto& resets = atomic.counts.reset;
auto& flips = atomic.counts.flip;
atomic.integer_ = 0b110;
fetch_flip(atomic, 0);
EXPECT_EQ(sets, 0);
EXPECT_EQ(resets, 0);
EXPECT_EQ(flips, 1);
EXPECT_EQ(atomic.integer_, 0b111);
fetch_flip(atomic, 2);
EXPECT_EQ(sets, 0);
EXPECT_EQ(resets, 0);
EXPECT_EQ(flips, 2);
EXPECT_EQ(atomic.integer_, 0b011);
}
} // namespace
class AtomicFetchSetTest : public ::testing::Test {};
class AtomicFetchResetTest : public ::testing::Test {};
class AtomicFetchFlipTest : public ::testing::Test {};
TEST_F(AtomicFetchSetTest, Basic) {
atomic_fetch_set_basic<std::uint16_t>();
......@@ -266,6 +325,13 @@ TEST_F(AtomicFetchResetTest, Basic) {
atomic_fetch_reset_basic<std::uint8_t>();
}
TEST_F(AtomicFetchFlipTest, Basic) {
atomic_fetch_flip_basic<std::uint16_t>();
atomic_fetch_flip_basic<std::uint32_t>();
atomic_fetch_flip_basic<std::uint64_t>();
atomic_fetch_flip_basic<std::uint8_t>();
}
TEST_F(AtomicFetchSetTest, EnsureFetchOrUsed) {
atomic_fetch_set_non_std_atomic<std::uint8_t>();
atomic_fetch_set_non_std_atomic<std::uint16_t>();
......@@ -280,6 +346,13 @@ TEST_F(AtomicFetchResetTest, EnsureFetchAndUsed) {
atomic_fetch_reset_non_std_atomic<std::uint64_t>();
}
TEST_F(AtomicFetchFlipTest, EnsureFetchXorUsed) {
atomic_fetch_flip_non_std_atomic<std::uint8_t>();
atomic_fetch_flip_non_std_atomic<std::uint16_t>();
atomic_fetch_flip_non_std_atomic<std::uint32_t>();
atomic_fetch_flip_non_std_atomic<std::uint64_t>();
}
TEST_F(AtomicFetchSetTest, FetchSetFallback) {
auto fetch_set = with_seq_cst{access::atomic_fetch_set_fallback};
......@@ -308,6 +381,20 @@ TEST_F(AtomicFetchResetTest, FetchResetFallback) {
atomic_fetch_reset_non_std_atomic<std::uint64_t>(fetch_reset);
}
TEST_F(AtomicFetchFlipTest, FetchFlipFallback) {
auto fetch_flip = with_seq_cst{access::atomic_fetch_flip_fallback};
atomic_fetch_flip_basic<std::uint16_t>(fetch_flip);
atomic_fetch_flip_basic<std::uint32_t>(fetch_flip);
atomic_fetch_flip_basic<std::uint64_t>(fetch_flip);
atomic_fetch_flip_basic<std::uint8_t>(fetch_flip);
atomic_fetch_flip_non_std_atomic<std::uint8_t>(fetch_flip);
atomic_fetch_flip_non_std_atomic<std::uint16_t>(fetch_flip);
atomic_fetch_flip_non_std_atomic<std::uint32_t>(fetch_flip);
atomic_fetch_flip_non_std_atomic<std::uint64_t>(fetch_flip);
}
TEST_F(AtomicFetchSetTest, FetchSetDefault) {
auto fetch_set = access::atomic_fetch_set;
......@@ -336,4 +423,18 @@ TEST_F(AtomicFetchResetTest, FetchResetDefault) {
atomic_fetch_reset_non_std_atomic<std::uint64_t>(fetch_reset);
}
TEST_F(AtomicFetchFlipTest, FetchFlipDefault) {
auto fetch_flip = access::atomic_fetch_flip;
atomic_fetch_flip_basic<std::uint16_t>(fetch_flip);
atomic_fetch_flip_basic<std::uint32_t>(fetch_flip);
atomic_fetch_flip_basic<std::uint64_t>(fetch_flip);
atomic_fetch_flip_basic<std::uint8_t>(fetch_flip);
atomic_fetch_flip_non_std_atomic<std::uint8_t>(fetch_flip);
atomic_fetch_flip_non_std_atomic<std::uint16_t>(fetch_flip);
atomic_fetch_flip_non_std_atomic<std::uint32_t>(fetch_flip);
atomic_fetch_flip_non_std_atomic<std::uint64_t>(fetch_flip);
}
} // namespace folly
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment