diff --git a/folly/synchronization/HazptrDomain.h b/folly/synchronization/HazptrDomain.h
index d1b6cac881ebf6f3642e926dfc054b8e32a3effe..f738db885d0c382d846cc0726e649858f96ccd49 100644
--- a/folly/synchronization/HazptrDomain.h
+++ b/folly/synchronization/HazptrDomain.h
@@ -25,7 +25,6 @@
 #include <folly/synchronization/AsymmetricMemoryBarrier.h>
 
 #include <atomic>
-#include <functional>
 #include <unordered_set> // for hash set in bulk_reclaim
 
 ///
@@ -48,6 +47,59 @@ constexpr int hazptr_domain_rcount_threshold() {
  *  A domain manages a set of hazard pointers and a set of retired objects.
  *
  *  Most user code need not specify any domains.
+ *
+ *  Notes on destruction order, tagged objects, locking and deadlock
+ *  avoidance:
+ *  - Tagged objects support reclamation order guarantees. A call to
+ *    cleanup_batch_tag(tag) guarantees that all objects with the
+ *    specified tag are reclaimed before the function returns.
+ *  - Due to the strict order, access to the set of tagged objects
+ *    needs synchronization and care must be taken to avoid deadlock.
+ *  - There are two types of reclamation operations to consider:
+ *   - Type A: A Type A reclamation operation is triggered by meeting
+ *     some threshold. Reclaimed objects may have different
+ *     tags. Hazard pointers are checked and only unprotected objects
+ *     are reclaimed. This type is expected to be expensive but
+ *     infrequent and the cost is amortized over a large number of
+ *     reclaimed objects. This type is needed to guarantee an upper
+ *     bound on unreclaimed reclaimable objects.
+ *   - Type B: A Type B reclamation operation is triggered by a call
+ *     to the function cleanup_batch_tag for a specific tag. All
+ *     objects with the specified tag must be reclaimed
+ *     unconditionally before returning from such a function
+ *     call. Hazard pointers are not checked. This type of reclamation
+ *     operation is expected to be inexpensive and may be invoked more
+ *     frequently than Type A.
+ *  - Tagged retired objects are kept in a single list in the domain
+ *    structure, named tagged_.
+ *  - Both Type A and Type B of reclamation pop all the objects in
+ *    tagged_ and sort them into two sets of reclaimable and
+ *    unreclaimable objects. The objects in the reclaimable set are
+ *    reclaimed and the objects in the unreclaimable set are pushed
+ *    back in tagged_.
+ *  - The tagged_ list is locked between popping all objects and
+ *    pushing back unreclaimable objects, in order to guarantee that
+ *    Type B operations do not miss any objects that match the
+ *    specified tag.
+ *  - A Type A operation cannot release the lock on the tagged_ list
+ *    before reclaiming reclaimable objects, to prevent concurrent
+ *    Type B operations from returning before the reclamation of
+ *    objects with matching tags.
+ *  - A Type B operation can release the lock on tagged_ before
+ *    reclaiming objects because the set of reclaimable objects by
+ *    Type B operations are disjoint.
+ *  - The lock on the tagged_ list is re-entrant, to prevent deadlock
+ *    when reclamation in a Type A operation requires a Type B
+ *    reclamation operation to complete.
+ *  - The implementation allows only one pattern of re-entrance: An
+ *    inner Type B inside an outer Type A.
+ *  - An inner Type B operation must have access and ability to modify
+ *    the outer Type A operation's set of reclaimable objects and
+ *    their children objects in order not to miss objects that match
+ *    the specified tag. Hence, Type A operations use data members,
+ *    unprotected_ and children_, to keep track of these objects
+ *    between reclamation steps and to provide inner Type B operations
+ *    access to these objects.
  */
 template <template <typename> class Atom>
 class hazptr_domain {
@@ -59,13 +111,8 @@ class hazptr_domain {
   static constexpr int kThreshold = detail::hazptr_domain_rcount_threshold();
   static constexpr int kMultiplier = 2;
   static constexpr uint64_t kSyncTimePeriod{2000000000}; // nanoseconds
-  static constexpr uint8_t kLogNumTaggedLists = 6;
-  static constexpr uint16_t kNumTaggedLists = 1 << kLogNumTaggedLists;
-  static constexpr uint16_t kTaggedListIDMask = kNumTaggedLists - 1;
   static constexpr uintptr_t kTagBit = hazptr_obj<Atom>::kTagBit;
 
-  static_assert(kNumTaggedLists <= 1024, "Too many tagged lists.");
-
   Atom<hazptr_rec<Atom>*> hazptrs_{nullptr};
   Atom<hazptr_obj<Atom>*> retired_{nullptr};
   Atom<uint64_t> sync_time_{0};
@@ -78,7 +125,9 @@ class hazptr_domain {
   bool shutdown_{false};
 
   RetiredList untagged_;
-  RetiredList tagged_[kNumTaggedLists];
+  RetiredList tagged_;
+  Obj* unprotected_; // List of unprotected objects being reclaimed
+  ObjList children_; // Children of unprotected objects being reclaimed
 
  public:
   /** Constructor */
@@ -89,9 +138,7 @@ class hazptr_domain {
     shutdown_ = true;
     reclaim_all_objects();
     free_hazptr_recs();
-    for (uint16_t i = 0; i < kNumTaggedLists; ++i) {
-      DCHECK(tagged_[i].empty());
-    }
+    DCHECK(tagged_.empty());
   }
 
   hazptr_domain(const hazptr_domain&) = delete;
@@ -125,14 +172,35 @@ class hazptr_domain {
   /** cleanup_batch_tag */
   void cleanup_batch_tag(const hazptr_obj_batch<Atom>* batch) noexcept {
     auto tag = reinterpret_cast<uintptr_t>(batch) + kTagBit;
-    RetiredList& rlist = tagged_[hash_tag(tag)];
+    auto obj = tagged_.pop_all(RetiredList::kAlsoLock);
     ObjList match, nomatch;
-    auto obj = rlist.pop_all(RetiredList::kAlsoLock);
-    list_match_condition(
-        obj, match, nomatch, [tag](Obj* o) { return o->batch_tag() == tag; });
-    rlist.push_unlock(nomatch);
+    list_match_tag(tag, obj, match, nomatch);
+    if (unprotected_) { // There must be ongoing do_reclamation
+      ObjList match2, nomatch2;
+      list_match_tag(tag, unprotected_, match2, nomatch2);
+      match.splice(match2);
+      unprotected_ = nomatch2.head();
+    }
+    if (children_.head()) {
+      ObjList match2, nomatch2;
+      list_match_tag(tag, children_.head(), match2, nomatch2);
+      match.splice(match2);
+      children_ = std::move(nomatch2);
+    }
+    auto count = nomatch.count();
+    nomatch.set_count(0);
+    tagged_.push_unlock(nomatch);
     obj = match.head();
     reclaim_list_transitive(obj);
+    if (count >= threshold()) {
+      check_threshold_and_reclaim(tagged_, RetiredList::kAlsoLock);
+    }
+  }
+
+  void
+  list_match_tag(uintptr_t tag, Obj* obj, ObjList& match, ObjList& nomatch) {
+    list_match_condition(
+        obj, match, nomatch, [tag](Obj* o) { return o->batch_tag() == tag; });
   }
 
  private:
@@ -188,7 +256,7 @@ class hazptr_domain {
     }
     uintptr_t btag = l.head()->batch_tag();
     bool tagged = ((btag & kTagBit) == kTagBit);
-    RetiredList& rlist = tagged ? tagged_[hash_tag(btag)] : untagged_;
+    RetiredList& rlist = tagged ? tagged_ : untagged_;
     /*** Full fence ***/ asymmetricLightBarrier();
     /* Only tagged lists need to be locked because tagging is used to
      * guarantee the identification of all objects with a specific
@@ -200,11 +268,6 @@ class hazptr_domain {
     check_threshold_and_reclaim(rlist, lock);
   }
 
-  uint16_t hash_tag(uintptr_t tag) {
-    size_t h = std::hash<uintptr_t>{}(tag);
-    return h & kTaggedListIDMask;
-  }
-
   /** threshold */
   int threshold() {
     auto thresh = kThreshold;
@@ -234,14 +297,18 @@ class hazptr_domain {
     list_match_condition(obj, match, nomatch, [&](Obj* o) {
       return hs.count(o->raw_ptr()) > 0;
     });
-    /* Reclaim unmatched objects */
-    hazptr_obj_list<Atom> children;
-    reclaim_list(nomatch.head(), children);
-    match.splice(children);
-    /* Push back matched and children of unmatched objects */
+    /* Reclaim unprotected objects and push back protected objects and
+       children of reclaimed objects */
     if (lock) {
+      unprotected_ = nomatch.head();
+      DCHECK(children_.empty());
+      reclaim_unprotected_safe();
+      match.splice(children_);
       rlist.push_unlock(match);
     } else {
+      ObjList children;
+      reclaim_unprotected_unsafe(nomatch.head(), children);
+      match.splice(children);
       rlist.push(match, false);
     }
   }
@@ -279,8 +346,26 @@ class hazptr_domain {
     }
   }
 
-  /** reclaim_list */
-  void reclaim_list(Obj* head, ObjList& children) {
+  /** reclaim_unprotected_safe */
+  void reclaim_unprotected_safe() {
+    while (unprotected_) {
+      auto obj = unprotected_;
+      unprotected_ = obj->next();
+      (*(obj->reclaim()))(obj, children_);
+    }
+  }
+
+  /** reclaim_unprotected_unsafe */
+  void reclaim_unprotected_unsafe(Obj* obj, ObjList& children) {
+    while (obj) {
+      auto next = obj->next();
+      (*(obj->reclaim()))(obj, children);
+      obj = next;
+    }
+  }
+
+  /** reclaim_unconditional */
+  void reclaim_unconditional(Obj* head, ObjList& children) {
     while (head) {
       auto next = head->next();
       (*(head->reclaim()))(head, children);
@@ -318,7 +403,7 @@ class hazptr_domain {
   void reclaim_list_transitive(Obj* head) {
     while (head) {
       ObjList children;
-      reclaim_list(head, children);
+      reclaim_unconditional(head, children);
       head = children.head();
     }
   }
diff --git a/folly/synchronization/HazptrObj.h b/folly/synchronization/HazptrObj.h
index f703a96f8acd0d53e361829a38462e3327cd1740..385498c73cc563512b6cfd649b654dc201aaefce 100644
--- a/folly/synchronization/HazptrObj.h
+++ b/folly/synchronization/HazptrObj.h
@@ -181,6 +181,15 @@ class hazptr_obj {
     }
   }
 
+  void push_obj(hazptr_domain<Atom>& domain) {
+    auto b = batch();
+    if (b) {
+      b->push_obj(this, domain);
+    } else {
+      push_to_retired(domain);
+    }
+  }
+
   void push_to_retired(hazptr_domain<Atom>& domain) {
 #if FOLLY_HAZPTR_THR_LOCAL
     if (&domain == &default_hazptr_domain<Atom>() && !domain.shutdown_) {
@@ -232,6 +241,10 @@ class hazptr_obj_list {
     return count_;
   }
 
+  void set_count(int val) {
+    count_ = val;
+  }
+
   bool empty() const noexcept {
     return head() == nullptr;
   }
@@ -307,10 +320,7 @@ class hazptr_obj_batch {
   }
 
  private:
-  template <typename, template <typename> class, typename>
-  friend class hazptr_obj_base;
-  template <typename, template <typename> class, typename>
-  friend class hazptr_obj_base_linked;
+  friend class hazptr_obj<Atom>;
 
   int count() const noexcept {
     return count_.load(std::memory_order_acquire);
@@ -411,7 +421,10 @@ class hazptr_obj_retired_list {
   void push_unlock(hazptr_obj_list<Atom>& l) noexcept {
     List ll(l.head(), l.tail());
     retired_.push_unlock(ll);
-    add_count(l.count());
+    auto count = l.count();
+    if (count) {
+      add_count(count);
+    }
   }
 
   int count() const noexcept {
@@ -495,12 +508,7 @@ class hazptr_obj_base : public hazptr_obj<Atom>, public hazptr_deleter<T, D> {
       hazptr_domain<Atom>& domain = default_hazptr_domain<Atom>()) {
     pre_retire(std::move(deleter));
     set_reclaim();
-    auto batch = this->batch();
-    if (batch) {
-      batch->push_obj(this, domain);
-    } else {
-      this->push_to_retired(domain); // defined in hazptr_obj
-    }
+    this->push_obj(domain); // defined in hazptr_obj
   }
 
   void retire(hazptr_domain<Atom>& domain) {
diff --git a/folly/synchronization/HazptrObjLinked.h b/folly/synchronization/HazptrObjLinked.h
index d134da8ad1d9db0e6efccbfa182098fac9456130..af0290f8dcfa81d6106b73a63d5100f7fdf9c85e 100644
--- a/folly/synchronization/HazptrObjLinked.h
+++ b/folly/synchronization/HazptrObjLinked.h
@@ -240,14 +240,7 @@ class hazptr_obj_base_linked : public hazptr_obj_linked<Atom>,
     this->pre_retire_check(); // defined in hazptr_obj
     set_reclaim();
     auto& domain = default_hazptr_domain<Atom>();
-    auto btag = this->batch_tag();
-    if (btag == 0u) {
-      this->push_to_retired(domain); // defined in hazptr_obj
-    } else {
-      btag -= btag & 1u;
-      auto batch = reinterpret_cast<hazptr_obj_batch<Atom>*>(btag);
-      batch->push_obj(this, domain);
-    }
+    this->push_obj(domain); // defined in hazptr_obj
   }
 
   /* unlink: Retire object if last link is released. */
diff --git a/folly/synchronization/detail/HazptrUtils.h b/folly/synchronization/detail/HazptrUtils.h
index e4d9362c72da98f3c494f1a861d9e7ebed517ef0..0f2acb60ffb3f0b3af4b9cdad5ce21c740507ef8 100644
--- a/folly/synchronization/detail/HazptrUtils.h
+++ b/folly/synchronization/detail/HazptrUtils.h
@@ -21,6 +21,7 @@
 #include <glog/logging.h>
 
 #include <atomic>
+#include <thread>
 
 /// Linked list class templates used in the hazard pointer library:
 /// - linked_list: Sequential linked list that uses a pre-existing
@@ -209,10 +210,14 @@ class shared_head_tail_list {
  *  following are valid combinations:
  *  - push(kMayBeLocked), pop_all(kAlsoLock), push_unlock
  *  - push(kMayNotBeLocked), pop_all(kDontLock)
+ *
+ *  Locking is reentrant to prevent self deadlock.
  */
 template <typename Node, template <typename> class Atom = std::atomic>
 class shared_head_only_list {
   Atom<uintptr_t> head_{0}; // lowest bit is a lock for pop all
+  Atom<std::thread::id> owner_{std::thread::id()};
+  int reentrance_{0};
 
   static constexpr uintptr_t kLockBit = 1u;
   static constexpr uintptr_t kUnlocked = 0u;
@@ -252,17 +257,33 @@ class shared_head_only_list {
   }
 
   void push_unlock(linked_list<Node>& l) noexcept {
-    auto oldval = head();
-    DCHECK_EQ(oldval & kLockBit, kLockBit); // Should be already locked
-    auto ptrval = oldval - kLockBit;
-    auto ptr = reinterpret_cast<Node*>(ptrval);
-    auto t = l.tail();
-    if (t) {
-      t->set_next(ptr); // Node must support set_next
+    DCHECK_EQ(owner(), std::this_thread::get_id());
+    uintptr_t lockbit;
+    if (reentrance_ > 0) {
+      DCHECK_EQ(reentrance_, 1);
+      --reentrance_;
+      lockbit = kLockBit;
+    } else {
+      clear_owner();
+      lockbit = kUnlocked;
+    }
+    DCHECK_EQ(reentrance_, 0);
+    while (true) {
+      auto oldval = head();
+      DCHECK_EQ(oldval & kLockBit, kLockBit); // Should be already locked
+      auto ptrval = oldval - kLockBit;
+      auto ptr = reinterpret_cast<Node*>(ptrval);
+      auto t = l.tail();
+      if (t) {
+        t->set_next(ptr); // Node must support set_next
+      }
+      auto newval =
+          (t == nullptr) ? ptrval : reinterpret_cast<uintptr_t>(l.head());
+      newval += lockbit;
+      if (cas_head(oldval, newval)) {
+        break;
+      }
     }
-    auto newval =
-        (t == nullptr) ? ptrval : reinterpret_cast<uintptr_t>(l.head());
-    set_head(newval);
   }
 
   bool check_lock() const noexcept {
@@ -278,10 +299,6 @@ class shared_head_only_list {
     return head_.load(std::memory_order_acquire);
   }
 
-  void set_head(uintptr_t val) noexcept {
-    head_.store(val, std::memory_order_release);
-  }
-
   uintptr_t exchange_head() noexcept {
     auto newval = reinterpret_cast<uintptr_t>(nullptr);
     auto oldval = head_.exchange(newval, std::memory_order_acq_rel);
@@ -293,6 +310,19 @@ class shared_head_only_list {
         oldval, newval, std::memory_order_acq_rel, std::memory_order_acquire);
   }
 
+  std::thread::id owner() {
+    return owner_.load(std::memory_order_relaxed);
+  }
+
+  void set_owner() {
+    DCHECK(owner() == std::thread::id());
+    owner_.store(std::this_thread::get_id(), std::memory_order_relaxed);
+  }
+
+  void clear_owner() {
+    owner_.store(std::thread::id(), std::memory_order_relaxed);
+  }
+
   Node* pop_all_no_lock() noexcept {
     auto oldval = exchange_head();
     DCHECK_EQ(oldval & kLockBit, kUnlocked);
@@ -304,10 +334,18 @@ class shared_head_only_list {
     while (true) {
       auto oldval = head();
       auto lockbit = oldval & kLockBit;
-      if (lockbit == kUnlocked) {
+      std::thread::id tid = std::this_thread::get_id();
+      if (lockbit == kUnlocked || owner() == tid) {
         auto newval = reinterpret_cast<uintptr_t>(nullptr) + kLockBit;
         if (cas_head(oldval, newval)) {
-          return reinterpret_cast<Node*>(oldval);
+          DCHECK_EQ(reentrance_, 0);
+          if (lockbit == kUnlocked) {
+            set_owner();
+          } else {
+            ++reentrance_;
+          }
+          auto ptrval = oldval - lockbit;
+          return reinterpret_cast<Node*>(ptrval);
         }
       }
       s.sleep();