Tweaks to folly::call_once and folly::once_flag

Summary: [Folly] Tweaks to `folly::call_once` and `folly::once_flag`. In particular: * Move the template class out of `detail`. * Add parameterization by the atomic type. * Expand the comments. Reviewed By: Orvid Differential Revision: D6637250 fbshipit-source-id: 3806580ca0badf8464f637750c4873b2aba9f916

Tweaks to folly::call_once and folly::once_flag
Summary: [Folly] Tweaks to `folly::call_once` and `folly::once_flag`. In particular: * Move the template class out of `detail`. * Add parameterization by the atomic type. * Expand the comments. Reviewed By: Orvid Differential Revision: D6637250 fbshipit-source-id: 3806580ca0badf8464f637750c4873b2aba9f916
651b5bd9 · Yedidya Feldblum · Facebook Github Bot · e14ef532 · 651b5bd9 · 651b5bd9
Commit 651b5bd9 authored Dec 29, 2017 by Yedidya Feldblum Committed by Facebook Github Bot Dec 29, 2017
4 changed files
--- a/folly/fibers/CallOnce.h
+++ b/folly/fibers/CallOnce.h
@@ -20,6 +20,6 @@
 namespace folly {
 namespace fibers {

-using once_flag = ::folly::detail::once_flag<folly::fibers::TimedMutex>;
+using once_flag = basic_once_flag<TimedMutex>;
 }
 } // namespace folly
--- a/folly/synchronization/CallOnce.h
+++ b/folly/synchronization/CallOnce.h
 /*
- * Copyright 2017 Facebook, Inc.
+ * Copyright 2016-present Facebook, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -14,20 +14,6 @@
 * limitations under the License.
 */

-/*
- * Drop-in replacement for std::call_once() with a fast path, which the GCC
- * implementation lacks.  The tradeoff is a slightly larger `once_flag' struct
- * (8 bytes vs 4 bytes with GCC on Linux/x64).
- *
- * $ call_once_test --benchmark --bm_min_iters=100000000 --threads=16
- * ============================================================================
- * folly/test/CallOnceTest.cpp                     relative  time/iter  iters/s
- * ============================================================================
- * StdCallOnceBench                                             3.54ns  282.82M
- * FollyCallOnceBench                                         698.48ps    1.43G
- * ============================================================================
- */
-
 #pragma once

 #include <atomic>
@@ -37,65 +23,89 @@
 #include <folly/Likely.h>
 #include <folly/Portability.h>
 #include <folly/SharedMutex.h>
+#include <folly/functional/Invoke.h>

 namespace folly {
-namespace detail {
-template <typename Mutex>
-class once_flag;

-// Implementation detail: out-of-line slow path
-template <class Mutex, class Callable, class... Args>
-void FOLLY_NOINLINE call_once_impl_no_inline(
-    detail::once_flag<Mutex>& flag,
-    Callable&& f,
-    Args&&... args) {
-  std::lock_guard<Mutex> lg(flag.mutex_);
-  if (flag.called_) {
-    return;
-  }
-
-  std::forward<Callable>(f)(std::forward<Args>(args)...);
+template <typename Mutex, template <typename> class Atom = std::atomic>
+class basic_once_flag;

-  flag.called_.store(true, std::memory_order_release);
+//  call_once
+//
+//  Drop-in replacement for std::call_once.
+//
+//  The libstdc++ implementation has two flaws:
+//  * it lacks a fast path, and
+//  * it deadlocks (in explicit violation of the standard) when invoked twice
+//    with a given flag, and the callable passed to the first invocation throws.
+//
+//  This implementation corrects both flaws.
+//
+//  The tradeoff is a slightly larger once_flag struct at 8 bytes, vs 4 bytes
+//  with libstdc++ on Linux/x64.
+//
+//  Does not work with std::once_flag.
+//
+//  mimic: std::call_once
+template <
+    typename Mutex,
+    template <typename> class Atom,
+    typename F,
+    typename... Args>
+FOLLY_ALWAYS_INLINE void
+call_once(basic_once_flag<Mutex, Atom>& flag, F&& f, Args&&... args) {
+  flag.call_once(std::forward<F>(f), std::forward<Args>(args)...);
 }
-} // namespace detail

-using once_flag = detail::once_flag<folly::SharedMutex>;
-
-template <class Mutex, class Callable, class... Args>
-void FOLLY_ALWAYS_INLINE
-call_once(detail::once_flag<Mutex>& flag, Callable&& f, Args&&... args) {
-  if (LIKELY(flag.called_.load(std::memory_order_acquire))) {
-    return;
-  }
-  call_once_impl_no_inline(
-      flag, std::forward<Callable>(f), std::forward<Args>(args)...);
-}
+//  basic_once_flag
+//
+//  The flag template to be used with call_once. Parameterizable by the mutex
+//  type and atomic template. The mutex type is required to mimic std::mutex and
+//  the atomic type is required to mimic std::atomic.
+template <typename Mutex, template <typename> class Atom>
+class basic_once_flag {
+ public:
+  constexpr basic_once_flag() noexcept = default;
+  basic_once_flag(const basic_once_flag&) = delete;
+  basic_once_flag& operator=(const basic_once_flag&) = delete;

-namespace detail {
+ private:
+  template <
+      typename Mutex_,
+      template <typename> class Atom_,
+      typename F,
+      typename... Args>
+  friend void call_once(basic_once_flag<Mutex_, Atom_>&, F&&, Args&&...);

-template <typename Mutex>
-class once_flag {
- public:
-  constexpr once_flag() noexcept = default;
-  once_flag(const once_flag&) = delete;
-  once_flag& operator=(const once_flag&) = delete;
+  template <typename F, typename... Args>
+  FOLLY_ALWAYS_INLINE void call_once(F&& f, Args&&... args) {
+    if (LIKELY(called_.load(std::memory_order_acquire))) {
+      return;
+    }
+    call_once_slow(std::forward<F>(f), std::forward<Args>(args)...);
+  }

-  template <typename Mutex_, typename Callable, class... Args>
-  friend void ::folly::call_once(
-      detail::once_flag<Mutex_>& flag,
-      Callable&& f,
-      Args&&... args);
-  template <typename Mutex_, typename Callable, class... Args>
-  friend void call_once_impl_no_inline(
-      detail::once_flag<Mutex_>& flag,
-      Callable&& f,
-      Args&&... args);
+  template <typename F, typename... Args>
+  FOLLY_NOINLINE void call_once_slow(F&& f, Args&&... args) {
+    std::lock_guard<Mutex> lock(mutex_);
+    if (called_.load(std::memory_order_relaxed)) {
+      return;
+    }
+    invoke(std::forward<F>(f), std::forward<Args>(args)...);
+    called_.store(true, std::memory_order_release);
+  }

- private:
-  std::atomic<bool> called_{false};
+  Atom<bool> called_{false};
  Mutex mutex_;
 };
-} // namespace detail
+
+//  once_flag
+//
+//  The flag type to be used with call_once. An instance of basic_once_flag.
+//
+//  Does not work with sd::call_once.
+//
+//  mimic: std::once_flag
+using once_flag = basic_once_flag<SharedMutex>;

 } // namespace folly
--- a/folly/synchronization/test/CallOnceBenchmark.cpp
+++ b/folly/synchronization/test/CallOnceBenchmark.cpp
 /*
- * Copyright 2017 Facebook, Inc.
+ * Copyright 2016-present Facebook, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -27,11 +27,11 @@
 DEFINE_int32(threads, 16, "benchmark concurrency");

 template <typename CallOnceFunc>
-void bm_impl(CallOnceFunc&& fn, int64_t iters) {
+void bm_impl(CallOnceFunc&& fn, size_t iters) {
  std::deque<std::thread> threads;
-  for (int i = 0; i < FLAGS_threads; ++i) {
+  for (size_t i = 0u; i < size_t(FLAGS_threads); ++i) {
    threads.emplace_back([&fn, iters] {
-      for (int64_t j = 0; j < iters; ++j) {
+      for (size_t j = 0u; j < iters; ++j) {
        fn();
      }
    });
@@ -55,6 +55,16 @@ BENCHMARK(FollyCallOnceBench, iters) {
  CHECK_EQ(1, out);
 }

+/*
+$ call_once_benchmark --bm_min_iters=100000000 --threads=16
+============================================================================
+folly/synchronization/test/CallOnceBenchmark.cpprelative  time/iter  iters/s
+============================================================================
+StdCallOnceBench                                             2.40ns  416.78M
+FollyCallOnceBench                                         651.94ps    1.53G
+============================================================================
+*/
+
 int main(int argc, char** argv) {
  gflags::ParseCommandLineFlags(&argc, &argv, true);
  folly::runBenchmarks();

--- a/folly/synchronization/test/CallOnceTest.cpp
+++ b/folly/synchronization/test/CallOnceTest.cpp
 /*
- * Copyright 2017 Facebook, Inc.
+ * Copyright 2016-present Facebook, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -18,20 +18,17 @@
 #include <mutex>
 #include <thread>

-#include <folly/portability/GFlags.h>
 #include <folly/portability/GTest.h>
 #include <folly/synchronization/CallOnce.h>

-#include <glog/logging.h>
-
-DEFINE_int32(threads, 16, "benchmark concurrency");
+static size_t const kNumThreads = 16;

 template <typename CallOnceFunc>
-void bm_impl(CallOnceFunc&& fn, int64_t iters) {
+void bm_impl(CallOnceFunc&& fn, size_t iters) {
  std::deque<std::thread> threads;
-  for (int i = 0; i < FLAGS_threads; ++i) {
+  for (size_t i = 0u; i < kNumThreads; ++i) {
    threads.emplace_back([&fn, iters] {
-      for (int64_t j = 0; j < iters; ++j) {
+      for (size_t j = 0u; j < iters; ++j) {
        fn();
      }
    });