Commit fa32adfe authored by Dan Melnic's avatar Dan Melnic Committed by Facebook Github Bot

Faster thread local iteration using the ThreadEntryNode

Summary: Faster thread local iteration using the ThreadEntryNode

Reviewed By: djwatson

Differential Revision: D8551597

fbshipit-source-id: 9118852c0a823851a95b63fe807bfc2679112beb
parent 0a1b8f18
......@@ -263,24 +263,26 @@ class ThreadLocalPtr {
class Iterator {
friend class Accessor;
const Accessor* accessor_;
threadlocal_detail::ThreadEntry* e_;
threadlocal_detail::ThreadEntryNode* e_;
void increment() {
e_ = e_->next;
e_ = e_->getNext();
incrementToValid();
}
void decrement() {
e_ = e_->prev;
e_ = e_->getPrev();
decrementToValid();
}
const T& dereference() const {
return *static_cast<T*>(e_->elements[accessor_->id_].ptr);
return *static_cast<T*>(
e_->getThreadEntry()->elements[accessor_->id_].ptr);
}
T& dereference() {
return *static_cast<T*>(e_->elements[accessor_->id_].ptr);
return *static_cast<T*>(
e_->getThreadEntry()->elements[accessor_->id_].ptr);
}
bool equal(const Iterator& other) const {
......@@ -290,21 +292,26 @@ class ThreadLocalPtr {
explicit Iterator(const Accessor* accessor)
: accessor_(accessor),
e_(&accessor_->meta_.head_) {
}
e_(&accessor_->meta_.head_.elements[accessor_->id_].node) {}
// we just need to check the ptr since it can be set to nullptr
// even if the entry is part of the list
bool valid() const {
return (e_->elements &&
accessor_->id_ < e_->elementsCapacity &&
e_->elements[accessor_->id_].ptr);
return (e_->getThreadEntry()->elements[accessor_->id_].ptr);
}
void incrementToValid() {
for (; e_ != &accessor_->meta_.head_ && !valid(); e_ = e_->next) { }
for (; e_ != &accessor_->meta_.head_.elements[accessor_->id_].node &&
!valid();
e_ = e_->getNext()) {
}
}
void decrementToValid() {
for (; e_ != &accessor_->meta_.head_ && !valid(); e_ = e_->prev) { }
for (; e_ != &accessor_->meta_.head_.elements[accessor_->id_].node &&
!valid();
e_ = e_->getPrev()) {
}
}
public:
......
......@@ -34,10 +34,6 @@ void ThreadEntryNode::initIfZero(bool locked) {
}
}
ThreadEntryNode* ThreadEntryNode::getNext() {
return &next->elements[id].node;
}
void ThreadEntryNode::push_back(ThreadEntry* head) {
// get the head prev and next nodes
ThreadEntryNode* hnode = &head->elements[id].node;
......
......@@ -90,15 +90,21 @@ struct ThreadEntryNode {
}
// if the list this node is part of is empty
bool empty() const {
FOLLY_ALWAYS_INLINE bool empty() const {
return (next == parent);
}
bool zero() const {
FOLLY_ALWAYS_INLINE bool zero() const {
return (!prev);
}
ThreadEntryNode* getNext();
FOLLY_ALWAYS_INLINE ThreadEntry* getThreadEntry() {
return parent;
}
FOLLY_ALWAYS_INLINE ThreadEntryNode* getPrev();
FOLLY_ALWAYS_INLINE ThreadEntryNode* getNext();
void push_back(ThreadEntry* head);
......@@ -217,6 +223,14 @@ struct ThreadEntryList {
struct PthreadKeyUnregisterTester;
FOLLY_ALWAYS_INLINE ThreadEntryNode* ThreadEntryNode::getPrev() {
return &prev->elements[id].node;
}
FOLLY_ALWAYS_INLINE ThreadEntryNode* ThreadEntryNode::getNext() {
return &next->elements[id].node;
}
/**
* We want to disable onThreadExit call at the end of shutdown, we don't care
* about leaking memory at that point.
......
......@@ -308,14 +308,23 @@ class SimpleThreadCachedInt {
};
TEST(ThreadLocalPtr, AccessAllThreadsCounter) {
const int kNumThreads = 10;
SimpleThreadCachedInt stci;
const int kNumThreads = 256;
SimpleThreadCachedInt stci[kNumThreads + 1];
std::atomic<bool> run(true);
std::atomic<int> totalAtomic(0);
std::atomic<int> totalAtomic;
;
std::vector<std::thread> threads;
// thread i will increment all the thread locals
// in the range 0..i
for (int i = 0; i < kNumThreads; ++i) {
threads.push_back(std::thread([&]() {
stci.add(1);
threads.push_back(std::thread([i, // i needs to be captured by value
&stci,
&run,
&totalAtomic]() {
for (int j = 0; j <= i; j++) {
stci[j].add(1);
}
totalAtomic.fetch_add(1);
while (run.load()) {
usleep(100);
......@@ -323,7 +332,9 @@ TEST(ThreadLocalPtr, AccessAllThreadsCounter) {
}));
}
while (totalAtomic.load() != kNumThreads) { usleep(100); }
EXPECT_EQ(kNumThreads, stci.read());
for (int i = 0; i <= kNumThreads; i++) {
EXPECT_EQ(kNumThreads - i, stci[i].read());
}
run.store(false);
for (auto& t : threads) {
t.join();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment