Commit 546113b4 authored by Gisle Dankel's avatar Gisle Dankel Committed by Facebook Github Bot

Jemalloc Huge Page Allocator

Summary:
An allocator that uses a jemalloc arena backed by 2MB huge pages.

This uses the jemalloc extent hooks to let jemalloc hand out memory backed by huge pages.
The huge pages are allocated when init(int nr_pages) is called, and does not grow (although extending it to grow in the future should be fairly straightforward).

madvise is used to mark the pages as huge. It does not guarantee success, and the only way to tell is by looking at /proc/<pid>/smaps and the total huge pages in /proc/<pid>/status.

The HugePageAllocator can be used as template parameter for stl and folly collections.

Reviewed By: yfeldblum

Differential Revision: D9297627

fbshipit-source-id: 734ca1b19972a8b5d7a12ee60cba1ffa3da50dc2
parent 80ae28a3
/*
* Copyright 2018-present Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <folly/experimental/JemallocHugePageAllocator.h>
#include <folly/portability/String.h>
#include <glog/logging.h>
#include <sstream>
#if defined(MADV_HUGEPAGE) && defined(FOLLY_HAVE_LIBJEMALLOC) && !FOLLY_SANITIZE
#include <jemalloc/jemalloc.h>
#if (JEMALLOC_VERSION_MAJOR >= 5)
#define FOLLY_JEMALLOC_HUGE_PAGE_ALLOCATOR_SUPPORTED 1
bool folly::JemallocHugePageAllocator::hugePagesSupported{true};
#endif
#endif // defined(FOLLY_HAVE_LIBJEMALLOC) && !FOLLY_SANITIZE
#ifndef FOLLY_JEMALLOC_HUGE_PAGE_ALLOCATOR_SUPPORTED
// Some mocks when jemalloc.h is not included or version too old
// or when the system does not support the MADV_HUGEPAGE madvise flag
#undef MALLOCX_ARENA
#undef MALLOCX_TCACHE_NONE
#undef MADV_HUGEPAGE
#define MALLOCX_ARENA(x) 0
#define MALLOCX_TCACHE_NONE 0
#define MADV_HUGEPAGE 0
typedef struct extent_hooks_s extent_hooks_t;
typedef void*(extent_alloc_t)(
extent_hooks_t*,
void*,
size_t,
size_t,
bool*,
bool*,
unsigned);
struct extent_hooks_s {
extent_alloc_t* alloc;
};
bool folly::JemallocHugePageAllocator::hugePagesSupported{false};
#endif // FOLLY_JEMALLOC_HUGE_PAGE_ALLOCATOR_SUPPORTED
namespace folly {
namespace {
static void print_error(int err, const char* msg) {
int cur_errno = std::exchange(errno, err);
PLOG(ERROR) << msg;
errno = cur_errno;
}
class HugePageArena {
public:
int init(int nr_pages);
void* reserve(size_t size, size_t alignment);
bool addressInArena(void* address) {
uintptr_t addr = reinterpret_cast<uintptr_t>(address);
return addr >= start_ && addr < end_;
}
size_t freeSpace() {
return end_ - freePtr_;
}
private:
static void* allocHook(
extent_hooks_t* extent,
void* new_addr,
size_t size,
size_t alignment,
bool* zero,
bool* commit,
unsigned arena_ind);
uintptr_t start_{0};
uintptr_t end_{0};
uintptr_t freePtr_{0};
extent_alloc_t* originalAlloc_{nullptr};
extent_hooks_t extentHooks_;
};
constexpr int kHugePageSize = 2 * 1024 * 1024;
// Singleton arena instance
static HugePageArena arena;
template <typename T, typename U>
static inline T align_up(T val, U alignment) {
DCHECK((alignment & (alignment - 1)) == 0);
return (val + alignment - 1) & ~(alignment - 1);
}
// mmap enough memory to hold the aligned huge pages, then use madvise
// to get huge pages. Note that this is only a hint and is not guaranteed
// to be honoured. Check /proc/<pid>/smaps to verify!
static uintptr_t map_pages(size_t nr_pages) {
// Initial mmapped area is large enough to contain the aligned huge pages
size_t alloc_size = nr_pages * kHugePageSize;
void* p = mmap(
nullptr,
alloc_size + kHugePageSize,
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS,
-1,
0);
if (p == MAP_FAILED) {
return 0;
}
// Aligned start address
uintptr_t first_page = align_up((uintptr_t)p, kHugePageSize);
// Unmap left-over 4k pages
munmap(p, first_page - (uintptr_t)p);
munmap(
(void*)(first_page + alloc_size),
kHugePageSize - (first_page - (uintptr_t)p));
// Tell the kernel to please give us huge pages for this range
madvise((void*)first_page, kHugePageSize * nr_pages, MADV_HUGEPAGE);
LOG(INFO) << nr_pages << " huge pages at " << (void*)first_page;
return first_page;
}
void* HugePageArena::allocHook(
extent_hooks_t* extent,
void* new_addr,
size_t size,
size_t alignment,
bool* zero,
bool* commit,
unsigned arena_ind) {
VLOG(1) << "Extent request of size " << size;
DCHECK((size & (size - 1)) == 0);
void* res = nullptr;
if (new_addr == nullptr) {
res = arena.reserve(size, alignment);
}
if (res == nullptr) {
LOG_IF(WARNING, new_addr != nullptr) << "Explicit address not supported";
res = arena.originalAlloc_(
extent, new_addr, size, alignment, zero, commit, arena_ind);
} else {
if (*zero) {
bzero(res, size);
}
}
return res;
}
int HugePageArena::init(int nr_pages) {
DCHECK(start_ == 0);
DCHECK(usingJEMalloc());
// Allocate one extra page for jemalloc's internal use
nr_pages++;
unsigned arena_index;
size_t len = sizeof(arena_index);
if (auto ret = mallctl("arenas.create", &arena_index, &len, nullptr, 0)) {
print_error(ret, "Unable to create arena");
return 0;
}
std::ostringstream key;
key << "arena." << arena_index << ".extent_hooks";
extent_hooks_t* hooks;
len = sizeof(hooks);
// Read the existing hooks
if (auto ret = mallctl(key.str().c_str(), &hooks, &len, nullptr, 0)) {
print_error(ret, "Unable to get the hooks");
return 0;
}
originalAlloc_ = hooks->alloc;
// Set the custom hook
extentHooks_ = *hooks;
extentHooks_.alloc = &allocHook;
extent_hooks_t* new_hooks = &extentHooks_;
if (auto ret = mallctl(
key.str().c_str(), nullptr, nullptr, &new_hooks, sizeof(new_hooks))) {
print_error(ret, "Unable to set the hooks");
return 0;
}
start_ = freePtr_ = map_pages(nr_pages);
if (start_ == 0) {
return false;
}
end_ = start_ + (nr_pages * kHugePageSize);
return MALLOCX_ARENA(arena_index) | MALLOCX_TCACHE_NONE;
}
void* HugePageArena::reserve(size_t size, size_t alignment) {
VLOG(1) << "Reserve: " << size << " alignemnt " << alignment;
uintptr_t res = align_up(freePtr_, alignment);
uintptr_t newFreePtr = res + size;
if (newFreePtr > end_) {
LOG(WARNING) << "Request of size " << size << " denied: " << freeSpace()
<< " bytes available - not backed by huge pages";
return nullptr;
}
freePtr_ = newFreePtr;
return reinterpret_cast<void*>(res);
}
} // namespace
int JemallocHugePageAllocator::flags_{0};
bool JemallocHugePageAllocator::init(int nr_pages) {
if (!usingJEMalloc()) {
LOG(ERROR) << "Not linked with jemalloc?";
hugePagesSupported = false;
}
if (hugePagesSupported) {
if (flags_ == 0) {
flags_ = arena.init(nr_pages);
} else {
LOG(WARNING) << "Already initialized";
}
} else {
LOG(WARNING) << "Huge Page Allocator not supported";
}
return flags_ != 0;
}
size_t JemallocHugePageAllocator::freeSpace() {
return arena.freeSpace();
}
bool JemallocHugePageAllocator::addressInArena(void* address) {
return arena.addressInArena(address);
}
} // namespace folly
/*
* Copyright 2018-present Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// http://www.canonware.com/download/jemalloc/jemalloc-latest/doc/jemalloc.html
#pragma once
#include <folly/CPortability.h>
#include <folly/memory/Malloc.h>
#include <folly/portability/Config.h>
#include <folly/portability/Memory.h>
#include <folly/portability/SysMman.h>
#include <cstddef>
#include <cstdint>
namespace folly {
/**
* An allocator which uses Jemalloc to create a dedicated huge page arena,
* backed by 2MB huge pages (on linux x86-64).
*
* This allocator is specifically intended for linux with the transparent
* huge page support set to 'madvise' and defrag policy set to 'madvise'
* or 'defer+madvise'.
* These can be controller via /sys/kernel/mm/transparent_hugepage/enabled
* and /sys/kernel/mm/transparent_hugepage/defrag.
*
* The allocator reserves a fixed-size area using mmap, and sets the
* MADV_HUGEPAGE page attribute using the madvise system call.
* A custom jemalloc hook is installed which is called when creating a new
* extent of memory. This will allocate from the reserved area if possible,
* and otherwise fall back to the default method.
* Jemalloc does not use allocated extents across different arenas without
* first unmapping them, and the advice flags are cleared on munmap.
* A regular malloc will never end up allocating memory from this arena.
*
* If binary isn't linked with jemalloc, the logic falls back to malloc / free.
*
* Note that the madvise call does not guarantee huge pages, it is best effort.
*
* 1GB Huge Pages are not supported at this point.
*/
class JemallocHugePageAllocator {
public:
static bool init(int nr_pages);
static void* allocate(size_t size) {
// If uninitialized, flags_ will be 0 and the mallocx behavior
// will match that of a regular malloc
return hugePagesSupported ? mallocx(size, flags_) : malloc(size);
}
static void* reallocate(void* p, size_t size) {
return hugePagesSupported ? rallocx(p, size, flags_) : realloc(p, size);
}
static void deallocate(void* p, size_t = 0) {
hugePagesSupported ? dallocx(p, flags_) : free(p);
}
static bool initialized() {
return flags_ != 0;
}
static size_t freeSpace();
static bool addressInArena(void* address);
private:
static int flags_;
static bool hugePagesSupported;
};
// STL compatible huge page allocator, for use with STL-style containers
template <typename T>
class CxxHugePageAllocator {
private:
using Self = CxxHugePageAllocator<T>;
public:
using value_type = T;
CxxHugePageAllocator() {}
template <typename U>
explicit CxxHugePageAllocator(CxxHugePageAllocator<U> const&) {}
T* allocate(std::size_t n) {
return static_cast<T*>(JemallocHugePageAllocator::allocate(sizeof(T) * n));
}
void deallocate(T* p, std::size_t n) {
JemallocHugePageAllocator::deallocate(p, sizeof(T) * n);
}
friend bool operator==(Self const&, Self const&) noexcept {
return true;
}
friend bool operator!=(Self const&, Self const&) noexcept {
return false;
}
};
} // namespace folly
/*
* Copyright 2016-present Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <folly/experimental/JemallocHugePageAllocator.h>
#include <folly/container/F14Map.h>
#include <folly/memory/Malloc.h>
#include <folly/portability/GTest.h>
#include <vector>
using jha = folly::JemallocHugePageAllocator;
TEST(JemallocHugePageAllocatorTest, Basic) {
EXPECT_FALSE(jha::initialized());
// Allocation should work even if uninitialized
auto ptr = jha::allocate(1024);
EXPECT_NE(nullptr, ptr);
jha::deallocate(ptr);
bool initialized = jha::init(1);
if (initialized) {
EXPECT_NE(0, jha::freeSpace());
}
ptr = jha::allocate(1024);
EXPECT_NE(nullptr, ptr);
if (initialized) {
EXPECT_TRUE(jha::addressInArena(ptr));
}
// Allocate some arrays on huge page
auto array_of_arrays = new (ptr) std::array<int, 100>[5];
if (initialized) {
EXPECT_FALSE(jha::addressInArena(&array_of_arrays));
EXPECT_TRUE(jha::addressInArena(&array_of_arrays[0]));
EXPECT_TRUE(jha::addressInArena(&array_of_arrays[0][0]));
}
jha::deallocate(ptr);
}
TEST(JemallocHugePageAllocatorTest, LargeAllocations) {
// Allocate before init - will not use huge pages
void* ptr0 = jha::allocate(3 * 1024 * 512);
// One 2MB huge page
bool initialized = jha::init(1);
if (initialized) {
EXPECT_NE(0, jha::freeSpace());
}
// This fits
void* ptr1 = jha::allocate(3 * 1024 * 512);
EXPECT_NE(nullptr, ptr1);
if (initialized) {
EXPECT_TRUE(jha::addressInArena(ptr1));
}
// This is too large to fit
void* ptr2 = jha::allocate(4 * 1024 * 1024);
EXPECT_NE(nullptr, ptr2);
EXPECT_FALSE(jha::addressInArena(ptr2));
// Free and reuse huge page area
jha::deallocate(ptr2);
jha::deallocate(ptr0);
ptr2 = jha::allocate(1024 * 1024);
// No memory in the huge page arena was freed - ptr0 was allocated
// before init and ptr2 didn't fit
EXPECT_FALSE(jha::addressInArena(ptr2));
jha::deallocate(ptr1);
ptr1 = jha::allocate(3 * 1024 * 512);
EXPECT_NE(nullptr, ptr1);
if (initialized) {
EXPECT_TRUE(jha::addressInArena(ptr1));
}
// Just using free works equally well
free(ptr1);
ptr1 = jha::allocate(3 * 1024 * 512);
EXPECT_NE(nullptr, ptr1);
if (initialized) {
EXPECT_TRUE(jha::addressInArena(ptr1));
}
jha::deallocate(ptr1);
jha::deallocate(ptr2);
}
TEST(JemallocHugePageAllocatorTest, STLAllocator) {
using MyVecAllocator = folly::CxxHugePageAllocator<int>;
using MyVec = std::vector<int, MyVecAllocator>;
using MyMapAllocator =
folly::CxxHugePageAllocator<folly::f14::detail::MapValueType<int, MyVec>>;
using MyMap = folly::F14FastMap<
int,
MyVec,
folly::f14::DefaultHasher<int>,
folly::f14::DefaultKeyEqual<int>,
MyMapAllocator>;
MyVec vec;
// This should work, just won't get huge pages since
// init hasn't been called yet
vec.reserve(100);
EXPECT_NE(nullptr, &vec[0]);
// Reserve & initialize, not on huge pages
MyVec vec2(100);
EXPECT_NE(nullptr, &vec[0]);
// F14 maps need quite a lot of memory by default
bool initialized = jha::init(4);
if (initialized) {
EXPECT_NE(0, jha::freeSpace());
}
// Reallocate, this time on huge pages
vec.reserve(200);
EXPECT_NE(nullptr, &vec[0]);
MyMap map1;
map1[0] = {1, 2, 3};
auto map2_ptr = std::make_unique<MyMap>();
MyMap& map2 = *map2_ptr;
map2[0] = {1, 2, 3};
if (initialized) {
EXPECT_TRUE(jha::addressInArena(&vec[0]));
EXPECT_TRUE(jha::addressInArena(&map1[0]));
EXPECT_TRUE(jha::addressInArena(&map1[0][0]));
EXPECT_TRUE(jha::addressInArena(&map2[0]));
EXPECT_TRUE(jha::addressInArena(&map2[0][0]));
}
// This will be on the huge page arena
map1[0] = std::move(vec);
// But not this, since vec2 content was allocated before init
map1[1] = std::move(vec2);
if (initialized) {
EXPECT_TRUE(jha::addressInArena(&map1[0]));
EXPECT_TRUE(jha::addressInArena(&map1[1]));
EXPECT_TRUE(jha::addressInArena(&map1[0][0]));
EXPECT_FALSE(jha::addressInArena(&map1[1][0]));
}
// realloc on huge pages
map1[1].reserve(200);
if (initialized) {
EXPECT_TRUE(jha::addressInArena(&map1[1][0]));
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment