Stop purging arenas in MemoryIdler

Summary: jemalloc has background threads to purge unused pages, so stop purging arenas used by idle threads since those arenas may be shared with active threads which must page fault new pages in after the purge Reviewed By: ot, davidtgoldblatt Differential Revision: D26793042 fbshipit-source-id: 13617bfb0ace1c5b7aae52dfdc428b19ff99f7bf

Stop purging arenas in MemoryIdler
Summary: jemalloc has background threads to purge unused pages, so stop purging arenas used by idle threads since those arenas may be shared with active threads which must page fault new pages in after the purge Reviewed By: ot, davidtgoldblatt Differential Revision: D26793042 fbshipit-source-id: 13617bfb0ace1c5b7aae52dfdc428b19ff99f7bf
02e534fb · Yedidya Feldblum · Facebook GitHub Bot · deb71d74 · 02e534fb
Commit 02e534fb authored Mar 31, 2021 by Yedidya Feldblum Committed by Facebook GitHub Bot Mar 31, 2021
Hide whitespace changes
Inline Side-by-side

Showing with 35 additions and 27 deletions

folly/detail/MemoryIdler.cpp folly/detail/MemoryIdler.cpp +35 -27

No files found.
--- a/folly/detail/MemoryIdler.cpp
+++ b/folly/detail/MemoryIdler.cpp
@@ -27,10 +27,16 @@
 #include <folly/concurrency/CacheLocality.h>
 #include <folly/memory/MallctlHelper.h>
 #include <folly/memory/Malloc.h>
+#include <folly/portability/GFlags.h>
 #include <folly/portability/PThread.h>
 #include <folly/portability/SysMman.h>
 #include <folly/portability/Unistd.h>

+DEFINE_bool(
+    folly_memory_idler_purge_arenas,
+    false,
+    "if enabled, folly memory-idler purges jemalloc arenas on thread idle");
+
 namespace folly {
 namespace detail {

@@ -46,34 +52,36 @@ void MemoryIdler::flushLocalMallocCaches() {
    return;
  }

-  try {
-    // Not using mallctlCall as this will fail if tcache is disabled.
-    mallctl("thread.tcache.flush", nullptr, nullptr, nullptr, 0);
-
-    // By default jemalloc has 4 arenas per cpu, and then assigns each
-    // thread to one of those arenas.  This means that in any service
-    // that doesn't perform a lot of context switching, the chances that
-    // another thread will be using the current thread's arena (and hence
-    // doing the appropriate dirty-page purging) are low.  Some good
-    // tuned configurations (such as that used by hhvm) use fewer arenas
-    // and then pin threads to avoid contended access.  In that case,
-    // purging the arenas is counter-productive.  We use the heuristic
-    // that if narenas <= 2 * num_cpus then we shouldn't do anything here,
-    // which detects when the narenas has been reduced from the default
-    unsigned narenas;
-    unsigned arenaForCurrent;
-    size_t mib[3];
-    size_t miblen = 3;
-
-    mallctlRead("opt.narenas", &narenas);
-    mallctlRead("thread.arena", &arenaForCurrent);
-    if (narenas > 2 * CacheLocality::system().numCpus &&
-        mallctlnametomib("arena.0.purge", mib, &miblen) == 0) {
-      mib[1] = static_cast<size_t>(arenaForCurrent);
-      mallctlbymib(mib, miblen, nullptr, nullptr, nullptr, 0);
+  // Not using mallctlCall as this will fail if tcache is disabled.
+  mallctl("thread.tcache.flush", nullptr, nullptr, nullptr, 0);
+
+  if (!FLAGS_folly_memory_idler_purge_arenas) {
+    try {
+      // By default jemalloc has 4 arenas per cpu, and then assigns each
+      // thread to one of those arenas.  This means that in any service
+      // that doesn't perform a lot of context switching, the chances that
+      // another thread will be using the current thread's arena (and hence
+      // doing the appropriate dirty-page purging) are low.  Some good
+      // tuned configurations (such as that used by hhvm) use fewer arenas
+      // and then pin threads to avoid contended access.  In that case,
+      // purging the arenas is counter-productive.  We use the heuristic
+      // that if narenas <= 2 * num_cpus then we shouldn't do anything here,
+      // which detects when the narenas has been reduced from the default
+      unsigned narenas;
+      unsigned arenaForCurrent;
+      size_t mib[3];
+      size_t miblen = 3;
+
+      mallctlRead("opt.narenas", &narenas);
+      mallctlRead("thread.arena", &arenaForCurrent);
+      if (narenas > 2 * CacheLocality::system().numCpus &&
+          mallctlnametomib("arena.0.purge", mib, &miblen) == 0) {
+        mib[1] = static_cast<size_t>(arenaForCurrent);
+        mallctlbymib(mib, miblen, nullptr, nullptr, nullptr, 0);
+      }
+    } catch (const std::runtime_error& ex) {
+      FB_LOG_EVERY_MS(WARNING, 10000) << ex.what();
    }
-  } catch (const std::runtime_error& ex) {
-    FB_LOG_EVERY_MS(WARNING, 10000) << ex.what();
  }
 }