Commit a05360ec authored by Brandon Schlinker's avatar Brandon Schlinker Committed by Facebook GitHub Bot

Enable observers to request socket timestamps

Summary:
D24094832 (https://github.com/facebook/folly/commit/842ecea531e8d6a90559f213be3793f7cd36781b) added `ByteEvent` support to `AsyncSocket`, making it easier to use socket timestamps for SCHED/TX/ACK events. With D24094832 (https://github.com/facebook/folly/commit/842ecea531e8d6a90559f213be3793f7cd36781b):
- An application can request socket timestamps by installing an observer with `ByteEvents` enabled, and then writing to the socket with a relevant timestamping flag (e.g., `TIMESTAMP_TX`, `TIMESTAMP_ACK`).
- Timestamps are delivered to the observer via the `byteEvent` callback.

This diff enables *observers* to request socket timestamping by interposing between the application and the socket by way of the `prewrite` event:
- Each time bytes from the application are about to be written to the underlying raw socket / FD, `AsyncSocket` will give observers an opportunity to request timestamping via a `prewrite` event.
- If an observer wishes to request timestamping, it can return a `PrewriteRequest` with information about the `WriteFlags` to add.
- If an observer wishes to timestamp a specific byte (first byte, every 1000th byte, etc.), it can request this with the `maybeOffsetToSplitWrite` field — socket timestamp requests apply to the *last byte* in the buffer being written, and thus if an observer wants to timestamp a specific byte, the buffer must be split so that the byte to timestamp is the final byte. The `AsyncSocket` implementation handles this split on behalf of the observer and adds `WriteFlags::CORK` (triggering `MSG_MORE`) where appropriate.
- If multiple observers are attached, `PrewriteRequests` are combined so that all observer needs are satisfied. In addition, `WriteFlags` set by the application and `WriteFlags` set by observers are combined during processing of `PrewriteRequests`.

Reviewed By: yfeldblum

Differential Revision: D24976575

fbshipit-source-id: 885720173d4a9ceefebc929a86d5e0f10f8889c4
parent 5c4c45a4
This diff is collapsed.
......@@ -1178,6 +1178,17 @@ class AsyncSocket : public AsyncTransport {
FOLLY_NODISCARD virtual std::vector<AsyncTransport::LifecycleObserver*>
getLifecycleObservers() const override;
/**
* Split iovec array at given byte offsets; produce a new array with result.
*/
static void splitIovecArray(
const size_t startOffset,
const size_t endOffset,
const iovec* srcVec,
const size_t srcCount,
iovec* dstVec,
size_t& dstCount);
protected:
enum ReadResultEnum {
READ_EOF = 0,
......
......@@ -798,32 +798,47 @@ class AsyncTransport : public DelayedDestruction,
* Structure used to communicate ByteEvents, such as TX and ACK timestamps.
*/
struct ByteEvent {
enum Type : uint8_t { WRITE = 1, SCHED = 2, TX = 3, ACK = 4 };
// types of events; start from 0 to enable indexing in arrays
enum Type : uint8_t {
WRITE = 0,
SCHED = 1,
TX = 2,
ACK = 3,
};
// type
Type type;
// offset of corresponding byte in raw byte stream
uint64_t offset{0};
size_t offset{0};
// transport timestamp, as recorded by AsyncTransport implementation
std::chrono::steady_clock::time_point ts = {
std::chrono::steady_clock::now()};
// kernel software timestamp; for Linux this is CLOCK_REALTIME
// kernel software timestamp for non-WRITE; for Linux this is CLOCK_REALTIME
// see https://www.kernel.org/doc/Documentation/networking/timestamping.txt
folly::Optional<std::chrono::nanoseconds> maybeSoftwareTs;
// hardware timestamp; see kernel documentation
// hardware timestamp for non-WRITE events; see kernel documentation
// see https://www.kernel.org/doc/Documentation/networking/timestamping.txt
folly::Optional<std::chrono::nanoseconds> maybeHardwareTs;
// for WRITE events, the number of raw bytes written to the socket
// optional to prevent accidental misuse in other event types
folly::Optional<size_t> maybeRawBytesWritten;
// for WRITE events, the number of raw bytes we tried to write to the socket
// optional to prevent accidental misuse in other event types
folly::Optional<size_t> maybeRawBytesTriedToWrite;
// for WRITE ByteEvents, additional WriteFlags passed
// optional to prevent accidental misuse in other event types
folly::Optional<WriteFlags> maybeWriteFlags;
/**
* For WRITE events, returns if SCHED timestamp requested.
*/
bool schedTimestampRequested() const {
bool schedTimestampRequestedOnWrite() const {
CHECK_EQ(Type::WRITE, type);
CHECK(maybeWriteFlags.has_value());
return isSet(*maybeWriteFlags, WriteFlags::TIMESTAMP_SCHED);
......@@ -832,7 +847,7 @@ class AsyncTransport : public DelayedDestruction,
/**
* For WRITE events, returns if TX timestamp requested.
*/
bool txTimestampRequested() const {
bool txTimestampRequestedOnWrite() const {
CHECK_EQ(Type::WRITE, type);
CHECK(maybeWriteFlags.has_value());
return isSet(*maybeWriteFlags, WriteFlags::TIMESTAMP_TX);
......@@ -841,7 +856,7 @@ class AsyncTransport : public DelayedDestruction,
/**
* For WRITE events, returns if ACK timestamp requested.
*/
bool ackTimestampRequested() const {
bool ackTimestampRequestedOnWrite() const {
CHECK_EQ(Type::WRITE, type);
CHECK(maybeWriteFlags.has_value());
return isSet(*maybeWriteFlags, WriteFlags::TIMESTAMP_ACK);
......@@ -861,8 +876,49 @@ class AsyncTransport : public DelayedDestruction,
* when observers are added / removed, based on the observer configuration.
*/
struct Config {
// enables full support for ByteEvents
// receive ByteEvents
bool byteEvents{false};
// observer is notified during prewrite stage and can add WriteFlags
bool prewrite{false};
};
/**
* Information provided to observer during prewrite event.
*
* Based on this information, an observer can build a PrewriteRequest.
*/
struct PrewriteState {
// raw byte stream offsets
size_t startOffset{0};
size_t endOffset{0};
// flags already set
WriteFlags writeFlags{WriteFlags::NONE};
// transport timestamp, as recorded by AsyncTransport implementation
//
// supports sequencing of PrewriteState events and ByteEvents for debug
std::chrono::steady_clock::time_point ts = {
std::chrono::steady_clock::now()};
};
/**
* Request that can be generated by observer in response to prewrite event.
*
* An observer can use a PrewriteRequest to request WriteFlags to be added
* to a write and/or to request that the write be split up, both of which
* can be used for timestamping.
*/
struct PrewriteRequest {
// offset to split write at; may be split at earlier offset by another req
folly::Optional<size_t> maybeOffsetToSplitWrite;
// write flags to be added if write split at requested offset
WriteFlags writeFlagsToAddAtOffset{WriteFlags::NONE};
// write flags to be added regardless of where write happens
WriteFlags writeFlagsToAdd{WriteFlags::NONE};
};
/**
......@@ -881,7 +937,9 @@ class AsyncTransport : public DelayedDestruction,
virtual ~LifecycleObserver() = default;
/**
* Returns observers configuration.
* Returns observer's configuration.
*
* @return Observer configuration.
*/
const Config& getConfig() { return observerConfig_; }
......@@ -998,6 +1056,33 @@ class AsyncTransport : public DelayedDestruction,
AsyncTransport* /* transport */,
const AsyncSocketException& /* ex */) noexcept {}
/**
* Invoked before each write to the transport if prewrite support enabled.
*
* The observer receives information about the pending write in the
* PrewriteState and can request ByteEvents / socket timestamps by returning
* a PrewriteRequest. The request contains the offset to split the write at
* (if any) and WriteFlags to apply.
*
* PrewriteRequests are aggregated across observers. The write buffer is
* split at the lowest offset returned by all observers. Flags are applied
* based on configuration within the PrewriteRequest. Requests are not
* sticky and expire after each write.
*
* Fewer bytes may be written than indicated in the PrewriteState or in the
* PrewriteRequest split if the underlying transport / socket / kernel
* blocks on write.
*
* @param transport Transport that ByteEvents are now unavailable for.
* @param state Pending write start and end offsets and flags.
* @return Request containing offset to split write at and flags.
*/
virtual PrewriteRequest prewrite(
AsyncTransport* /* transport */, const PrewriteState& /* state */) {
folly::terminate_with<std::runtime_error>(
"prewrite() called but not defined");
}
protected:
// observer configuration; cannot be changed post instantiation
const Config observerConfig_;
......
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -73,6 +73,17 @@ class BlockingSocket : public folly::AsyncSocket::ConnectCallback,
return folly::to_narrow(folly::to_signed(len));
}
void writev(
const iovec* vec,
size_t count,
folly::WriteFlags flags = folly::WriteFlags::NONE) {
sock_->writev(this, vec, count, flags);
eventBase_.loop();
if (err_.has_value()) {
throw err_.value();
}
}
void flush() {}
int32_t readAll(uint8_t* buf, size_t len) {
......@@ -81,6 +92,10 @@ class BlockingSocket : public folly::AsyncSocket::ConnectCallback,
int32_t read(uint8_t* buf, size_t len) { return readHelper(buf, len, false); }
int32_t readNoBlock(uint8_t* buf, size_t len) {
return readHelper(buf, len, false, EVLOOP_NONBLOCK);
}
folly::NetworkSocket getNetworkSocket() const {
return sock_->getNetworkSocket();
}
......@@ -125,16 +140,15 @@ class BlockingSocket : public folly::AsyncSocket::ConnectCallback,
err_ = ex;
}
int32_t readHelper(uint8_t* buf, size_t len, bool all) {
int32_t readHelper(uint8_t* buf, size_t len, bool all, int flags = 0) {
if (!sock_->good()) {
return 0;
}
readBuf_ = buf;
readLen_ = len;
sock_->setReadCB(this);
while (!err_ && sock_->good() && readLen_ > 0) {
eventBase_.loopOnce();
eventBase_.loopOnce(flags);
if (!all) {
break;
}
......
......@@ -40,6 +40,8 @@ class MockAsyncTransportLifecycleObserver
MOCK_METHOD2(
byteEventsUnavailableMock,
void(AsyncTransport*, const AsyncSocketException&));
MOCK_METHOD2(
prewriteMock, PrewriteRequest(AsyncTransport*, const PrewriteState&));
private:
void observerAttach(AsyncTransport* trans) noexcept override {
......@@ -69,6 +71,10 @@ class MockAsyncTransportLifecycleObserver
AsyncTransport* trans, const AsyncSocketException& ex) noexcept override {
byteEventsUnavailableMock(trans, ex);
}
PrewriteRequest prewrite(
AsyncTransport* trans, const PrewriteState& state) noexcept override {
return prewriteMock(trans, state);
}
};
/**
......@@ -106,6 +112,10 @@ class MockAsyncTransportObserverForByteEvents
transport->addLifecycleObserver(this);
}
const std::vector<AsyncTransport::ByteEvent>& getByteEvents() {
return byteEvents_;
}
folly::Optional<AsyncTransport::ByteEvent> getByteEventReceivedWithOffset(
const uint64_t offset, const AsyncTransport::ByteEvent::Type type) {
for (const auto& byteEvent : byteEvents_) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment