Commit a9d90ea1 authored by Artem Lantsev's avatar Artem Lantsev Committed by Facebook Github Bot

Add memory padding and alignment to prevent false sharing

Summary: This is a follow up commit for the thread https://github.com/facebook/folly/pull/378

Reviewed By: nbronson

Differential Revision: D4860356

fbshipit-source-id: f10a0d12a593c18b1abf94da5b477c524c04f4be
parent 4e9783b9
......@@ -27,6 +27,8 @@
#include <type_traits>
#include <utility>
#include <folly/detail/CacheLocality.h>
namespace folly {
/*
......@@ -166,11 +168,14 @@ struct ProducerConsumerQueue {
}
private:
char pad0_[detail::CacheLocality::kFalseSharingRange];
const uint32_t size_;
T* const records_;
std::atomic<unsigned int> readIndex_;
std::atomic<unsigned int> writeIndex_;
FOLLY_ALIGN_TO_AVOID_FALSE_SHARING std::atomic<unsigned int> readIndex_;
FOLLY_ALIGN_TO_AVOID_FALSE_SHARING std::atomic<unsigned int> writeIndex_;
char pad1_[detail::CacheLocality::kFalseSharingRange - sizeof(writeIndex_)];
};
}
......@@ -79,7 +79,7 @@ struct ThroughputTest {
QueueType queue_;
std::atomic<bool> done_;
int iters_;
const int iters_;
int cpu0_;
int cpu1_;
};
......@@ -176,7 +176,7 @@ struct LatencyTest {
QueueType queue_;
std::atomic<bool> done_;
int time_cost_;
int iters_;
const int iters_;
int cpu0_;
int cpu1_;
Histogram<int> hist_;
......@@ -250,26 +250,53 @@ int main(int argc, char** argv) {
#if 0
/*
Benchmark on Intel(R) Xeon(R) CPU E5-2660 0 @ 2.20GHz
Latency histogram:
log(nsec)
min max count
6 7 5124
7 8 4799
8 9 49
9 10 2
10 11 1
11 12 5
12 13 3
13 14 9
14 15 8
Benchmark
$ lscpu
Architecture: x86_64
CPU op-mode(s): 32-bit, 64-bit
Byte Order: Little Endian
CPU(s): 24
On-line CPU(s) list: 0-23
Thread(s) per core: 1
Core(s) per socket: 1
Socket(s): 24
NUMA node(s): 1
Vendor ID: GenuineIntel
CPU family: 6
Model: 60
Model name: Intel Core Processor (Haswell, no TSX)
Stepping: 1
CPU MHz: 2494.244
BogoMIPS: 4988.48
Hypervisor vendor: KVM
Virtualization type: full
L1d cache: 32K
L1i cache: 32K
L2 cache: 4096K
NUMA node0 CPU(s): 0-23
$ ../buck-out/gen/folly/test/producer_consumer_queue_benchmark
5 6 1 5
6 7 1893 11358
7 8 39671 277697
8 9 34921 279368
9 10 17799 160191
10 11 3685 36850
11 12 1075 11825
12 13 456 5472
13 14 422 5486
14 15 64 896
15 16 7 105
16 17 3 48
17 18 3 51
============================================================================
folly/test/ProducerConsumerQueueBenchmark.cpp relative time/iter iters/s
============================================================================
----------------------------------------------------------------------------
BM_ProducerConsumer(1048574) 7.52ns 132.90M
BM_ProducerConsumerAffinity(1048574) 8.28ns 120.75M
BM_ProducerConsumerLatency(1048574) 10.00s 99.98m
BM_ProducerConsumer(1048574) 5.82ns 171.75M
BM_ProducerConsumerAffinity(1048574) 7.36ns 135.83M
BM_ProducerConsumerLatency(1048574) 1.67min 9.99m
============================================================================
*/
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment