Commit 59c0f069 authored by Dan Melnic's avatar Dan Melnic Committed by Facebook Github Bot

Add support for io_uring based EventBase backend

Summary: Add support for io_uring based EventBase backend

Reviewed By: kevin-vigor

Differential Revision: D18682173

fbshipit-source-id: d6e02879e2b493680caaa3097d75970d0b5d7d07
parent cfa22893
......@@ -206,17 +206,21 @@ endif()
if (NOT ${LIBURING_FOUND})
list(REMOVE_ITEM files
${FOLLY_DIR}/experimental/io/IoUring.cpp
${FOLLY_DIR}/experimental/io/IoUringBackend.cpp
)
list(REMOVE_ITEM hfiles
${FOLLY_DIR}/experimental/io/IoUring.h
${FOLLY_DIR}/experimental/io/IoUringBackend.h
)
endif()
if (NOT ${LIBAIO_FOUND} AND NOT ${LIBURING_FOUND})
list(REMOVE_ITEM files
${FOLLY_DIR}/experimental/io/AsyncBase.cpp
${FOLLY_DIR}/experimental/io/PollIoBackend.cpp
)
list(REMOVE_ITEM hfiles
${FOLLY_DIR}/experimental/io/AsyncBase.h
${FOLLY_DIR}/experimental/io/PollIoBackend.h
)
endif()
......
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <folly/experimental/io/IoUringBackend.h>
#include <folly/Likely.h>
#include <folly/String.h>
#include <folly/portability/Sockets.h>
#include <folly/synchronization/CallOnce.h>
#include <glog/logging.h>
namespace folly {
IoUringBackend::IoUringBackend(size_t capacity, size_t maxSubmit, size_t maxGet)
: PollIoBackend(capacity, maxSubmit, maxGet) {
::memset(&ioRing_, 0, sizeof(ioRing_));
::memset(&params_, 0, sizeof(params_));
params_.flags |= IORING_SETUP_CQSIZE;
params_.cq_entries = capacity;
// allocate entries both for poll add and cancel
if (::io_uring_queue_init_params(2 * maxSubmit_, &ioRing_, &params_)) {
LOG(ERROR) << "io_uring_queue_init_params(" << 2 * maxSubmit_ << ","
<< params_.cq_entries << ") "
<< "failed errno = " << errno << ":\"" << folly::errnoStr(errno)
<< "\" " << this;
throw std::runtime_error("io_uring_queue_init error");
}
sqRingMask_ = *ioRing_.sq.kring_mask;
cqRingMask_ = *ioRing_.cq.kring_mask;
numEntries_ *= 2;
entries_.reset(new IoSqe[numEntries_]);
timerEntry_ = &entries_[0];
timerEntry_->backend_ = this;
// build the free list - first entry is the timer entry
for (size_t i = 2; i < numEntries_; i++) {
entries_[i - 1].next_ = &entries_[i];
entries_[i - 1].backend_ = this;
}
entries_[numEntries_ - 1].backend_ = this;
freeHead_ = &entries_[1];
// add the timer fd
if (!addTimerFd()) {
cleanup();
entries_.reset();
throw std::runtime_error("io_uring_submit error");
}
}
IoUringBackend::~IoUringBackend() {
shuttingDown_ = true;
cleanup();
}
void IoUringBackend::cleanup() {
if (ioRing_.ring_fd > 0) {
::io_uring_queue_exit(&ioRing_);
ioRing_.ring_fd = -1;
}
}
bool IoUringBackend::isAvailable() {
static bool sAvailable = true;
static folly::once_flag initFlag;
folly::call_once(initFlag, [&]() {
try {
IoUringBackend backend(1024, 128);
} catch (const std::runtime_error&) {
sAvailable = false;
}
});
return sAvailable;
}
void* IoUringBackend::allocSubmissionEntry() {
return ::io_uring_get_sqe(&ioRing_);
}
int IoUringBackend::submitOne(IoCb* /*unused*/) {
return submitBusyCheck();
}
int IoUringBackend::cancelOne(IoCb* ioCb) {
auto* rentry = static_cast<IoSqe*>(allocIoCb());
if (!rentry) {
return 0;
}
auto* sqe = ::io_uring_get_sqe(&ioRing_);
CHECK(sqe);
rentry->prepPollRemove(sqe, ioCb); // prev entry
int ret = submitBusyCheck();
if (ret < 0) {
// release the sqe
releaseIoCb(rentry);
}
return ret;
}
int IoUringBackend::getActiveEvents(bool waitForEvents) {
size_t i = 0;
struct io_uring_cqe* cqe = nullptr;
// we can be called from the submitList() method
if (FOLLY_LIKELY(waitForEvents)) {
::io_uring_wait_cqe(&ioRing_, &cqe);
} else {
::io_uring_peek_cqe(&ioRing_, &cqe);
}
while (cqe && (i < maxGet_)) {
i++;
IoSqe* sqe = reinterpret_cast<IoSqe*>(io_uring_cqe_get_data(cqe));
if (FOLLY_UNLIKELY(static_cast<PollIoBackend::IoCb*>(sqe) == timerEntry_)) {
// just set the flag here
processTimers_ = true;
} else {
processIoCb(sqe, cqe->res);
}
::io_uring_cqe_seen(&ioRing_, cqe);
cqe = nullptr;
::io_uring_peek_cqe(&ioRing_, &cqe);
}
return static_cast<int>(i);
}
int IoUringBackend::submitBusyCheck() {
int num;
while ((num = ::io_uring_submit(&ioRing_)) == -EBUSY) {
// if we get EBUSY, try to consume some CQ entries
getActiveEvents(false);
};
return num;
}
int IoUringBackend::submitBusyCheckAndWait() {
int num;
while ((num = ::io_uring_submit_and_wait(&ioRing_, 1)) == -EBUSY) {
// if we get EBUSY, try to consume some CQ entries
getActiveEvents(false);
};
return num;
}
size_t IoUringBackend::submitList(IoCbList& ioCbs) {
int i = 0;
size_t ret = 0;
while (!ioCbs.empty()) {
auto* entry = &ioCbs.front();
ioCbs.pop_front();
auto* sqe = ::io_uring_get_sqe(&ioRing_);
CHECK(sqe); // this should not happen
auto* ev = entry->event_->getEvent();
entry->prepPollAdd(sqe, ev->ev_fd, getPollFlags(ev->ev_events));
i++;
if (ioCbs.empty()) {
int num = submitBusyCheckAndWait();
CHECK_EQ(num, i);
ret += i;
} else {
if (static_cast<size_t>(i) == maxSubmit_) {
int num = submitBusyCheck();
CHECK_EQ(num, i);
ret += i;
i = 0;
}
}
}
return ret;
}
} // namespace folly
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
extern "C" {
#include <liburing.h>
}
#include <folly/experimental/io/PollIoBackend.h>
#include <glog/logging.h>
namespace folly {
class IoUringBackend : public PollIoBackend {
public:
explicit IoUringBackend(
size_t capacity,
size_t maxSubmit = 128,
size_t maxGet = static_cast<size_t>(-1));
~IoUringBackend() override;
// returns true if the current Linux kernel version
// supports the io_uring backend
static bool isAvailable();
protected:
// from PollIoBackend
void* allocSubmissionEntry() override;
int getActiveEvents(bool waitForEvents = true) override;
size_t submitList(IoCbList& ioCbs) override;
int submitOne(IoCb* ioCb) override;
int cancelOne(IoCb* ioCb) override;
int submitBusyCheck();
int submitBusyCheckAndWait();
struct IoSqe : public PollIoBackend::IoCb {
explicit IoSqe(PollIoBackend* backend = nullptr, bool poolAlloc = true)
: PollIoBackend::IoCb(backend, poolAlloc) {}
~IoSqe() override = default;
void prepPollAdd(void* entry, int fd, uint32_t events) override {
CHECK(entry);
struct io_uring_sqe* sqe = reinterpret_cast<struct io_uring_sqe*>(entry);
::io_uring_prep_poll_add(sqe, fd, events);
::io_uring_sqe_set_data(sqe, this);
}
FOLLY_ALWAYS_INLINE void prepPollRemove(
struct io_uring_sqe* sqe,
void* user_data) {
CHECK(sqe);
::io_uring_prep_poll_remove(sqe, user_data);
::io_uring_sqe_set_data(sqe, this);
}
};
PollIoBackend::IoCb* allocNewIoCb() override {
return new IoSqe(this, false);
}
void cleanup();
size_t submit_internal();
std::unique_ptr<IoSqe[]> entries_;
// io_uring related
struct io_uring_params params_;
struct io_uring ioRing_;
uint32_t sqRingMask_{0};
uint32_t cqRingMask_{0};
};
} // namespace folly
This diff is collapsed.
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <poll.h>
#include <sys/types.h>
#include <chrono>
#include <map>
#include <vector>
#include <boost/intrusive/list.hpp>
#include <folly/CPortability.h>
#include <folly/CppAttributes.h>
#include <folly/io/async/EventBaseBackendBase.h>
namespace folly {
class PollIoBackend : public EventBaseBackendBase {
public:
explicit PollIoBackend(size_t capacity, size_t maxSubmit, size_t maxGet);
~PollIoBackend() override;
// from EventBaseBackendBase
event_base* getEventBase() override {
return nullptr;
}
int eb_event_base_loop(int flags) override;
int eb_event_base_loopbreak() override;
int eb_event_add(Event& event, const struct timeval* timeout) override;
int eb_event_del(Event& event) override;
protected:
struct IoCb
: public boost::intrusive::list_base_hook<
boost::intrusive::link_mode<boost::intrusive::auto_unlink>> {
explicit IoCb(PollIoBackend* backend, bool poolAlloc = true)
: backend_(backend), poolAlloc_(poolAlloc) {}
virtual ~IoCb() = default;
PollIoBackend* backend_;
const bool poolAlloc_;
IoCb* next_{nullptr}; // this is for the free list
Event* event_{nullptr};
size_t useCount_{0};
FOLLY_ALWAYS_INLINE void resetEvent() {
// remove it from the list
unlink();
if (event_) {
event_->setUserData(nullptr);
event_ = nullptr;
}
}
virtual void prepPollAdd(void* entry, int fd, uint32_t events) = 0;
};
using IoCbList =
boost::intrusive::list<IoCb, boost::intrusive::constant_time_size<false>>;
struct TimerEntry {
explicit TimerEntry(Event* event) : event_(event) {}
TimerEntry(Event* event, const struct timeval& timeout);
Event* event_{nullptr};
std::chrono::time_point<std::chrono::steady_clock> expireTime_;
bool operator==(const TimerEntry& other) {
return event_ == other.event_;
}
std::chrono::microseconds getRemainingTime() const {
auto now = std::chrono::steady_clock::now();
if (expireTime_ > now) {
return std::chrono::duration_cast<std::chrono::microseconds>(
expireTime_ - now);
}
return std::chrono::microseconds(0);
}
static bool isExpired(
const std::chrono::time_point<std::chrono::steady_clock>& timestamp) {
return (std::chrono::steady_clock::now() >= timestamp);
}
void setExpireTime(const struct timeval& timeout) {
uint64_t us = static_cast<uint64_t>(timeout.tv_sec) *
static_cast<uint64_t>(1000000) +
static_cast<uint64_t>(timeout.tv_usec);
expireTime_ =
std::chrono::steady_clock::now() + std::chrono::microseconds(us);
}
};
static FOLLY_ALWAYS_INLINE uint32_t getPollFlags(short events) {
uint32_t ret = 0;
if (events & EV_READ) {
ret |= POLLIN;
}
if (events & EV_WRITE) {
ret |= POLLOUT;
}
return ret;
}
static FOLLY_ALWAYS_INLINE short getPollEvents(uint32_t flags, short events) {
short ret = 0;
if (flags & POLLIN) {
ret |= EV_READ;
}
if (flags & POLLOUT) {
ret |= EV_WRITE;
}
if (flags & (POLLERR | POLLHUP)) {
ret |= (EV_READ | EV_WRITE);
}
ret &= events;
return ret;
}
// timer processing
bool addTimerFd();
void scheduleTimeout();
void scheduleTimeout(const std::chrono::microseconds& us);
void addTimerEvent(Event& event, const struct timeval* timeout);
void removeTimerEvent(Event& event);
size_t processTimers();
size_t processActiveEvents();
void processIoCb(IoCb* ioCb, int64_t res) noexcept;
IoCb* FOLLY_NULLABLE allocIoCb();
void releaseIoCb(IoCb* aioIoCb);
virtual IoCb* allocNewIoCb() = 0;
virtual void* allocSubmissionEntry() = 0;
virtual int getActiveEvents(bool waitForEvents = true) = 0;
virtual size_t submitList(IoCbList& ioCbs) = 0;
virtual int submitOne(IoCb* ioCb) = 0;
virtual int cancelOne(IoCb* ioCb) = 0;
int eb_event_modify_inserted(Event& event, IoCb* ioCb);
size_t capacity_;
size_t numEntries_;
IoCb* timerEntry_{nullptr};
IoCb* freeHead_{nullptr};
// timer related
int timerFd_{-1};
bool timerChanged_{false};
std::map<
std::chrono::time_point<std::chrono::steady_clock>,
std::vector<TimerEntry>>
timers_;
std::map<Event*, std::chrono::time_point<std::chrono::steady_clock>>
eventToTimers_;
// submit
size_t maxSubmit_;
IoCbList submitList_;
// process
size_t maxGet_;
// loop related
bool loopBreak_{false};
bool shuttingDown_{false};
bool processTimers_{false};
size_t numInsertedEvents_{0};
IoCbList activeEvents_;
};
} // namespace folly
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <sys/eventfd.h>
#include <folly/FileUtil.h>
#include <folly/experimental/io/IoUringBackend.h>
#include <folly/init/Init.h>
#include <folly/io/async/EventHandler.h>
#include <folly/io/async/test/EventBaseTestLib.h>
#include <folly/portability/GTest.h>
// IoUringBackend specific tests
namespace {
class EventFD : public folly::EventHandler {
public:
EventFD(
uint64_t num,
uint64_t& total,
bool persist,
folly::EventBase* eventBase)
: EventFD(total, createFd(num), persist, eventBase) {}
~EventFD() override {
unregisterHandler();
if (fd_ > 0) {
::close(fd_);
fd_ = -1;
}
}
// from folly::EventHandler
void handlerReady(uint16_t /*events*/) noexcept override {
// we do not read to leave the fd signalled
if (!persist_) {
registerHandler(folly::EventHandler::READ);
}
size_t data;
if (sizeof(data) == folly::readNoInt(fd_, &data, sizeof(data))) {
CHECK_EQ(data, 1);
++num_;
if (total_ > 0) {
--total_;
if (total_ == 0) {
evb_->terminateLoopSoon();
}
}
}
}
uint64_t getNum() const {
return num_;
}
private:
static int createFd(uint64_t num) {
// we want it a semaphore
int fd = ::eventfd(0, EFD_CLOEXEC | EFD_SEMAPHORE | EFD_NONBLOCK);
CHECK_GT(fd, 0);
CHECK_EQ(folly::writeNoInt(fd, &num, sizeof(num)), sizeof(num));
return fd;
}
EventFD(uint64_t& total, int fd, bool persist, folly::EventBase* eventBase)
: EventHandler(eventBase, folly::NetworkSocket::fromFd(fd)),
total_(total),
fd_(fd),
persist_(persist),
evb_(eventBase) {
if (persist_) {
registerHandler(folly::EventHandler::READ | folly::EventHandler::PERSIST);
} else {
registerHandler(folly::EventHandler::READ);
}
}
uint64_t num_{0};
uint64_t& total_;
int fd_{-1};
bool persist_;
folly::EventBase* evb_;
};
void testOverflow(bool overflow) {
static constexpr size_t kBackendCapacity = 64;
static constexpr size_t kBackendMaxSubmit = 32;
// for overflow == true we use a greater than kBackendCapacity number of
// EventFD instances and lower when overflow == false
size_t kNumEventFds = overflow ? 2048 : 32;
static constexpr size_t kEventFdCount = 16;
auto total = kNumEventFds * kEventFdCount;
folly::EventBase evb(std::make_unique<folly::IoUringBackend>(
kBackendCapacity, kBackendMaxSubmit));
std::vector<std::unique_ptr<EventFD>> eventsVec;
eventsVec.reserve(kNumEventFds);
for (size_t i = 0; i < kNumEventFds; i++) {
eventsVec.emplace_back(
std::make_unique<EventFD>(kEventFdCount, total, true, &evb));
}
evb.loopForever();
for (size_t i = 0; i < kNumEventFds; i++) {
CHECK_EQ(eventsVec[i]->getNum(), kEventFdCount);
}
}
} // namespace
TEST(IoUringBackend, NoOverflow) {
testOverflow(false);
}
TEST(IoUringBackend, Overflow) {
testOverflow(true);
}
int main(int argc, char** argv) {
::testing::InitGoogleTest(&argc, argv);
folly::init(&argc, &argv);
bool avail = folly::IoUringBackend::isAvailable();
LOG(INFO) << "folly::IoUringBackend::isAvailable() returned " << avail;
if (!avail) {
LOG(INFO)
<< "Not running tests since IoUringBackend is not available on this kernel version";
return 0;
}
static constexpr size_t kCapacity = 16 * 1024;
static constexpr size_t kMaxSubmit = 128;
folly::test::EventBaseBackendProvider::GetBackendFunc func;
func = []() {
return std::make_unique<folly::IoUringBackend>(kCapacity, kMaxSubmit);
};
folly::test::EventBaseBackendProvider::setGetBackendFunc(std::move(func));
return RUN_ALL_TESTS();
}
......@@ -606,16 +606,17 @@ TEST_F(EventBaseTest, ReadWriteSimultaneous) {
TimePoint end;
// It's not strictly required that the EventBase register us about both
// events in the same call. So, it's possible that if the EventBase
// implementation changes this test could start failing, and it wouldn't be
// considered breaking the API. However for now it's nice to exercise this
// code path.
// events in the same call or thw read/write notifications are delievered at
// the same. So, it's possible that if the EventBase implementation changes
// this test could start failing, and it wouldn't be considered breaking the
// API. However for now it's nice to exercise this code path.
ASSERT_EQ(handler.log.size(), 1);
ASSERT_EQ(handler.log[0].events, EventHandler::READ | EventHandler::WRITE);
if (handler.log[0].events & EventHandler::READ) {
ASSERT_EQ(handler.log[0].bytesRead, sock0WriteLength);
ASSERT_GT(handler.log[0].bytesWritten, 0);
}
T_CHECK_TIMEOUT(
start, handler.log[0].timestamp, milliseconds(events[0].milliseconds));
ASSERT_EQ(handler.log[0].bytesRead, sock0WriteLength);
ASSERT_GT(handler.log[0].bytesWritten, 0);
T_CHECK_TIMEOUT(start, end, milliseconds(events[0].milliseconds));
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment