Commit 68a78d99 authored by Brandon Schlinker's avatar Brandon Schlinker Committed by Facebook GitHub Bot

TcpInfo, an abstraction layer to capture and access TCP state

Summary:
An cross-platform abstraction layer for capturing current TCP and congestion control state.

Fetches information from four different resources:
- `TCP_INFO` (state of TCP)
- `TCP_CONGESTION` (name of congestion control algorithm)
- `TCP_CC_INFO` (details for a given congestion control algorithm)
- `SIOCOUTQ`/`SIOCINQ` (socket buffers)

`TcpInfo` is designed to solve two problems:

**(1) `TcpInfo` unblocks use of the latest `tcp_info` struct and related structs.**

As of 2020, the `tcp_info` struct shipped with glibc (sysdeps/gnu/netinet/tcp.h) has not been updated since 2007 due to compatibility concerns; see commit titled "Update netinet/tcp.h from Linux 4.18" in glibc repository. This creates scenarios where fields that have long been available in the kernel ABI cannot be accessed.

Even if glibc does eventually update the `tcp_info` shipped, we don't want to be limited to their update cycle. `TcpInfo` solves this in two ways:
   - First, `TcpInfoTypes.h` contains a copy of the latest `tcp_info` struct for Linux, and `TcpInfo` always uses this struct for lookups; this decouples `TcpInfo` from glibc's / the platform's `tcp_info`.
   - Second, `TcpInfo` determines which fields in the struct are populated (and thus valid) based on the number of bytes the kernel ABI copies into the struct during the corresponding getsockopt operation. When a field is accessed through `getFieldAsOptUInt64` or through an accessor, `TcpInfo` returns an empty optional if the field is unavailable at run-time.

In this manner, `TcpInfo` enables the latest struct to always be used while ensuring that programs can determine at runtime which fields are available for use --- there's no risk of a program assuming that a field is valid when it in fact was never initialized/set by the ABI.

**(2) `TcpInfo` abstracts platform differences while still keeping details available.**

The `tcp_info` structure varies significantly between Apple and Linux. `TcpInfo` exposes a subset of `tcp_info` and other fields through accessors that hide these differences, and reduce potential errors (e.g., Apple stores srtt in milliseconds, Linux stores in microseconds, `TcpInfo::srtt` does the conversions needed to always return in microseconds). When a field is unavailable on a platform, the accessor returns an empty optional.

In parallel, the underlying structures remain accessible and can be safely accessed through the appropriate `getFieldAsOptUInt64(...)`. This enables platform-specific code to have full access to the underlying structure while also benefiting from `TcpInfo`'s knowledge of whether a given field was populated by the ABI at run-time.

Support for FreeBSD will be added in a subsequent diff.

Differential Revision: D22134355

fbshipit-source-id: accae8762aa88c187cc473b8121df901c6ffb456
parent 16ac56e4
This diff is collapsed.
This diff is collapsed.
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#if defined(__linux__) || defined(__APPLE__)
#include <netinet/tcp.h>
#include <sys/types.h>
#include <unistd.h>
#endif
#if defined(__linux__)
#include <asm/types.h>
#endif
namespace folly {
namespace tcpinfo {
/**
*
* tcp_info structures.
*
*/
#if defined(__linux__)
#define FOLLY_HAVE_TCP_INFO 1
const int tcp_info_sock_opt = TCP_INFO;
/**
* tcp_info as of kernel 5.7.
*
* The kernel ABI is fully backwards compatible. Thus, if a new structure has
* been released, this structure can (and should be) upgraded.
*
* Having a copy of the latest available structure decouples compilation from
* whatever is in the header files available to the compiler. These may be
* very outdated; see discussion of glibc below. WrappedTcpInfo determines which
* fields are supported by the kernel running on the machine based on the size
* of the tcp_info object returned and exposes only those fields.
*/
struct tcp_info {
__u8 tcpi_state;
__u8 tcpi_ca_state;
__u8 tcpi_retransmits;
__u8 tcpi_probes;
__u8 tcpi_backoff;
__u8 tcpi_options;
__u8 tcpi_snd_wscale : 4, tcpi_rcv_wscale : 4;
__u8 tcpi_delivery_rate_app_limited : 1;
__u32 tcpi_rto;
__u32 tcpi_ato;
__u32 tcpi_snd_mss;
__u32 tcpi_rcv_mss;
__u32 tcpi_unacked;
__u32 tcpi_sacked;
__u32 tcpi_lost;
__u32 tcpi_retrans;
__u32 tcpi_fackets;
/* Times. */
__u32 tcpi_last_data_sent;
__u32 tcpi_last_ack_sent; /* Not remembered, sorry. */
__u32 tcpi_last_data_recv;
__u32 tcpi_last_ack_recv;
/* Metrics. */
__u32 tcpi_pmtu;
__u32 tcpi_rcv_ssthresh;
__u32 tcpi_rtt;
__u32 tcpi_rttvar;
__u32 tcpi_snd_ssthresh;
__u32 tcpi_snd_cwnd;
__u32 tcpi_advmss;
__u32 tcpi_reordering;
__u32 tcpi_rcv_rtt;
__u32 tcpi_rcv_space;
__u32 tcpi_total_retrans;
__u64 tcpi_pacing_rate;
__u64 tcpi_max_pacing_rate;
__u64 tcpi_bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked */
__u64 tcpi_bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived */
__u32 tcpi_segs_out; /* RFC4898 tcpEStatsPerfSegsOut */
__u32 tcpi_segs_in; /* RFC4898 tcpEStatsPerfSegsIn */
__u32 tcpi_notsent_bytes;
__u32 tcpi_min_rtt;
__u32 tcpi_data_segs_in; /* RFC4898 tcpEStatsDataSegsIn */
__u32 tcpi_data_segs_out; /* RFC4898 tcpEStatsDataSegsOut */
__u64 tcpi_delivery_rate;
__u64 tcpi_busy_time; /* Time (usec) busy sending data */
__u64 tcpi_rwnd_limited; /* Time (usec) limited by receive window */
__u64 tcpi_sndbuf_limited; /* Time (usec) limited by send buffer */
__u32 tcpi_delivered;
__u32 tcpi_delivered_ce;
__u64 tcpi_bytes_sent; /* RFC4898 tcpEStatsPerfHCDataOctetsOut */
__u64 tcpi_bytes_retrans; /* RFC4898 tcpEStatsPerfOctetsRetrans */
__u32 tcpi_dsack_dups; /* RFC4898 tcpEStatsStackDSACKDups */
__u32 tcpi_reord_seen; /* reordering events seen */
__u32 tcpi_rcv_ooopack; /* Out-of-order packets received */
__u32 tcpi_snd_wnd; /* peer's advertised receive window after
* scaling (bytes)
*/
};
/**
* Legacy tcp_info used to confirm backwards compatibility.
*
* We use this structure in test cases where the kernel has an older version of
* tcp_info to verify that the wrapper returns unsupported fields as empty
* optionals.
*
* This tcp_info struct is what shipped in 3.x kernels, and is still shipped
* with glibc as of 2020 in sysdeps/gnu/netinet/tcp.h. glibc has not updated the
* tcp_info struct since 2007 due to compatibility concerns; see commit titled
* "Update netinet/tcp.h from Linux 4.18" in glibc repository.
*/
struct tcp_info_legacy {
__u8 tcpi_state;
__u8 tcpi_ca_state;
__u8 tcpi_retransmits;
__u8 tcpi_probes;
__u8 tcpi_backoff;
__u8 tcpi_options;
__u8 tcpi_snd_wscale : 4, tcpi_rcv_wscale : 4;
__u32 tcpi_rto;
__u32 tcpi_ato;
__u32 tcpi_snd_mss;
__u32 tcpi_rcv_mss;
__u32 tcpi_unacked;
__u32 tcpi_sacked;
__u32 tcpi_lost;
__u32 tcpi_retrans;
__u32 tcpi_fackets;
/* Times. */
__u32 tcpi_last_data_sent;
__u32 tcpi_last_ack_sent; /* Not remembered, sorry. */
__u32 tcpi_last_data_recv;
__u32 tcpi_last_ack_recv;
/* Metrics. */
__u32 tcpi_pmtu;
__u32 tcpi_rcv_ssthresh;
__u32 tcpi_rtt;
__u32 tcpi_rttvar;
__u32 tcpi_snd_ssthresh;
__u32 tcpi_snd_cwnd;
__u32 tcpi_advmss;
__u32 tcpi_reordering;
__u32 tcpi_rcv_rtt;
__u32 tcpi_rcv_space;
__u32 tcpi_total_retrans;
};
#elif defined(__APPLE__)
#define FOLLY_HAVE_TCP_INFO 1
using tcp_info = ::tcp_connection_info;
const int tcp_info_sock_opt = TCP_CONNECTION_INFO;
#endif
/**
* extra structures used to communicate congestion control information.
*/
#if defined(__linux__) && defined(TCP_CONGESTION) && defined(TCP_CC_INFO)
#define FOLLY_HAVE_TCP_CC_INFO 1
struct tcpvegas_info {
__u32 tcpv_enabled;
__u32 tcpv_rttcnt;
__u32 tcpv_rtt;
__u32 tcpv_minrtt;
};
struct tcp_dctcp_info {
__u16 dctcp_enabled;
__u16 dctcp_ce_state;
__u32 dctcp_alpha;
__u32 dctcp_ab_ecn;
__u32 dctcp_ab_tot;
};
struct tcp_bbr_info {
/* u64 bw: max-filtered BW (app throughput) estimate in Byte per sec: */
__u32 bbr_bw_lo; /* lower 32 bits of bw */
__u32 bbr_bw_hi; /* upper 32 bits of bw */
__u32 bbr_min_rtt; /* min-filtered RTT in uSec */
__u32 bbr_pacing_gain; /* pacing gain shifted left 8 bits */
__u32 bbr_cwnd_gain; /* cwnd gain shifted left 8 bits */
};
union tcp_cc_info {
struct tcpvegas_info vegas;
struct tcp_dctcp_info dctcp;
struct tcp_bbr_info bbr;
};
#endif
} // namespace tcpinfo
} // namespace folly
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment