Reapply with more fixes: "[folly] Fix concurrency issues in ConcurrentSkipList.""

Summary: Original diff D29248955 (https://github.com/facebook/folly/commit/6f4811eff3b7472347cc34c0ac9876ddd96287fc) had a bug that was causing a memory leak and was reverted. This diff reapplies it but with an extra fix. Reviewed By: yfeldblum Differential Revision: D30082935 fbshipit-source-id: 0f119189fe631fc363dffe5c515a8bfa9a054cf6

Reapply with more fixes: "[folly] Fix concurrency issues in ConcurrentSkipList.""
Summary: Original diff D29248955 (https://github.com/facebook/folly/commit/6f4811eff3b7472347cc34c0ac9876ddd96287fc) had a bug that was causing a memory leak and was reverted. This diff reapplies it but with an extra fix. Reviewed By: yfeldblum Differential Revision: D30082935 fbshipit-source-id: 0f119189fe631fc363dffe5c515a8bfa9a054cf6
379e39cc · Yiding Jia · Facebook GitHub Bot · 21f9cf7d · 379e39cc · 379e39cc
Commit 379e39cc authored Aug 06, 2021 by Yiding Jia Committed by Facebook GitHub Bot Aug 06, 2021
Hide whitespace changes
Inline Side-by-side

Showing with 34 additions and 36 deletions

folly/ConcurrentSkipList-inl.h folly/ConcurrentSkipList-inl.h +25 -27

folly/ConcurrentSkipList.h folly/ConcurrentSkipList.h +9 -9

No files found.
--- a/folly/ConcurrentSkipList-inl.h
+++ b/folly/ConcurrentSkipList-inl.h
@@ -97,7 +97,7 @@ class SkipListNode {

  inline SkipListNode* skip(int layer) const {
    DCHECK_LT(layer, height_);
-    return skip_[layer].load(std::memory_order_consume);
+    return skip_[layer].load(std::memory_order_acquire);
  }

  // next valid node as in the linked list
@@ -155,7 +155,7 @@ class SkipListNode {
    }
  }

-  uint16_t getFlags() const { return flags_.load(std::memory_order_consume); }
+  uint16_t getFlags() const { return flags_.load(std::memory_order_acquire); }
  void setFlags(uint16_t flags) {
    flags_.store(flags, std::memory_order_release);
  }
@@ -269,43 +269,41 @@ class NodeRecycler<
    dirty_.store(true, std::memory_order_relaxed);
  }

-  int addRef() { return refs_.fetch_add(1, std::memory_order_relaxed); }
+  int addRef() { return refs_.fetch_add(1, std::memory_order_acq_rel); }

  int releaseRef() {
-    // We don't expect to clean the recycler immediately everytime it is OK
-    // to do so. Here, it is possible that multiple accessors all release at
-    // the same time but nobody would clean the recycler here. If this
-    // happens, the recycler will usually still get cleaned when
-    // such a race doesn't happen. The worst case is the recycler will
-    // eventually get deleted along with the skiplist.
-    if (LIKELY(!dirty_.load(std::memory_order_relaxed) || refs() > 1)) {
-      return refs_.fetch_add(-1, std::memory_order_relaxed);
+    // This if statement is purely an optimization. It's possible that this
+    // misses an opportunity to delete, but that's OK, we'll try again at
+    // the next opportunity. It does not harm the thread safety. For this
+    // reason, we can use relaxed loads to make the decision.
+    if (!dirty_.load(std::memory_order_relaxed) || refs() > 1) {
+      return refs_.fetch_add(-1, std::memory_order_acq_rel);
    }

    std::unique_ptr<std::vector<NodeType*>> newNodes;
+    int ret;
    {
+      // The order at which we lock, add, swap, is very important for
+      // correctness.
      std::lock_guard<MicroSpinLock> g(lock_);
-      if (nodes_.get() == nullptr || refs() > 1) {
-        return refs_.fetch_add(-1, std::memory_order_relaxed);
+      ret = refs_.fetch_add(-1, std::memory_order_acq_rel);
+      if (ret == 1) {
+        // When releasing the last reference, it is safe to remove all the
+        // current nodes in the recycler, as we already acquired the lock here
+        // so no more new nodes can be added, even though new accessors may be
+        // added after this.
+        newNodes.swap(nodes_);
+        dirty_.store(false, std::memory_order_relaxed);
      }
-      // once refs_ reaches 1 and there is no other accessor, it is safe to
-      // remove all the current nodes in the recycler, as we already acquired
-      // the lock here so no more new nodes can be added, even though new
-      // accessors may be added after that.
-      newNodes.swap(nodes_);
-      dirty_.store(false, std::memory_order_relaxed);
    }
-
    // TODO(xliu) should we spawn a thread to do this when there are large
    // number of nodes in the recycler?
-    for (auto& node : *newNodes) {
-      NodeType::destroy(alloc_, node);
+    if (newNodes) {
+      for (auto& node : *newNodes) {
+        NodeType::destroy(alloc_, node);
+      }
    }
-
-    // decrease the ref count at the very end, to minimize the
-    // chance of other threads acquiring lock_ to clear the deleted
-    // nodes again.
-    return refs_.fetch_add(-1, std::memory_order_relaxed);
+    return ret;
  }

  NodeAlloc& alloc() { return alloc_; }

--- a/folly/ConcurrentSkipList.h
+++ b/folly/ConcurrentSkipList.h
@@ -250,7 +250,7 @@ class ConcurrentSkipList {
    return foundLayer;
  }

-  int height() const { return head_.load(std::memory_order_consume)->height(); }
+  int height() const { return head_.load(std::memory_order_acquire)->height(); }

  int maxLayer() const { return height() - 1; }

@@ -401,12 +401,12 @@ class ConcurrentSkipList {
  }

  const value_type* first() const {
-    auto node = head_.load(std::memory_order_consume)->skip(0);
+    auto node = head_.load(std::memory_order_acquire)->skip(0);
    return node ? &node->data() : nullptr;
  }

  const value_type* last() const {
-    NodeType* pred = head_.load(std::memory_order_consume);
+    NodeType* pred = head_.load(std::memory_order_acquire);
    NodeType* node = nullptr;
    for (int layer = maxLayer(); layer >= 0; --layer) {
      do {
@@ -434,7 +434,7 @@ class ConcurrentSkipList {
      int* max_layer) const {
    *max_layer = maxLayer();
    return findInsertionPoint(
-        head_.load(std::memory_order_consume), *max_layer, data, preds, succs);
+        head_.load(std::memory_order_acquire), *max_layer, data, preds, succs);
  }

  // Find node for access. Returns a paired values:
@@ -450,7 +450,7 @@ class ConcurrentSkipList {
  // results, this is slightly faster than findNodeRightDown for better
  // localality on the skipping pointers.
  std::pair<NodeType*, int> findNodeDownRight(const value_type& data) const {
-    NodeType* pred = head_.load(std::memory_order_consume);
+    NodeType* pred = head_.load(std::memory_order_acquire);
    int ht = pred->height();
    NodeType* node = nullptr;

@@ -478,7 +478,7 @@ class ConcurrentSkipList {
  // find node by first stepping right then stepping down.
  // We still keep this for reference purposes.
  std::pair<NodeType*, int> findNodeRightDown(const value_type& data) const {
-    NodeType* pred = head_.load(std::memory_order_consume);
+    NodeType* pred = head_.load(std::memory_order_acquire);
    NodeType* node = nullptr;
    auto top = maxLayer();
    int found = 0;
@@ -502,7 +502,7 @@ class ConcurrentSkipList {
  }

  void growHeight(int height) {
-    NodeType* oldHead = head_.load(std::memory_order_consume);
+    NodeType* oldHead = head_.load(std::memory_order_acquire);
    if (oldHead->height() >= height) { // someone else already did this
      return;
    }
@@ -598,7 +598,7 @@ class ConcurrentSkipList<T, Comp, NodeAlloc, MAX_HEIGHT>::Accessor {
  size_type count(const key_type& data) const { return contains(data); }

  iterator begin() const {
-    NodeType* head = sl_->head_.load(std::memory_order_consume);
+    NodeType* head = sl_->head_.load(std::memory_order_acquire);
    return iterator(head->next());
  }
  iterator end() const { return iterator(nullptr); }
@@ -814,7 +814,7 @@ class ConcurrentSkipList<T, Comp, NodeAlloc, MAX_HEIGHT>::Skipper {

 private:
  NodeType* head() const {
-    return accessor_.skiplist()->head_.load(std::memory_order_consume);
+    return accessor_.skiplist()->head_.load(std::memory_order_acquire);
  }

  Accessor accessor_;