From 7885ce0147401d5f6908ad4da660334031d836b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Sat, 31 Jan 2026 23:25:01 +0100 Subject: [PATCH 01/15] tcp: try to avoid safer when ACKs are thinned MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add newly acked pkts EWMA. When ACK thinning occurs, select between safer and unsafe cep delta in AccECN processing based on it. If the packets ACKed per ACK tends to be large, don't conservatively assume ACE field overflow. This patch uses the existing 2-byte holes in the rx group for new u16 variables withtout creating more holes. Below are the pahole outcomes before and after this patch: [BEFORE THIS PATCH] struct tcp_sock { [...] u32 delivered_ecn_bytes[3]; /* 2744 12 */ /* XXX 4 bytes hole, try to pack */ [...] __cacheline_group_end__tcp_sock_write_rx[0]; /* 2816 0 */ [...] /* size: 3264, cachelines: 51, members: 177 */ } [AFTER THIS PATCH] struct tcp_sock { [...] u32 delivered_ecn_bytes[3]; /* 2744 12 */ u16 pkts_acked_ewma; /* 2756 2 */ /* XXX 2 bytes hole, try to pack */ [...] __cacheline_group_end__tcp_sock_write_rx[0]; /* 2816 0 */ [...] /* size: 3264, cachelines: 51, members: 178 */ } Signed-off-by: Ilpo Järvinen Co-developed-by: Chia-Yu Chang Signed-off-by: Chia-Yu Chang Acked-by: Paolo Abeni Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260131222515.8485-2-chia-yu.chang@nokia-bell-labs.com Signed-off-by: Paolo Abeni --- .../networking/net_cachelines/tcp_sock.rst | 1 + include/linux/tcp.h | 1 + net/ipv4/tcp.c | 2 ++ net/ipv4/tcp_input.c | 20 ++++++++++++++++++- 4 files changed, 23 insertions(+), 1 deletion(-) diff --git a/Documentation/networking/net_cachelines/tcp_sock.rst b/Documentation/networking/net_cachelines/tcp_sock.rst index 26f32dbcf6ec..563daea10d6c 100644 --- a/Documentation/networking/net_cachelines/tcp_sock.rst +++ b/Documentation/networking/net_cachelines/tcp_sock.rst @@ -105,6 +105,7 @@ u32 received_ce read_mostly read_w u32[3] received_ecn_bytes read_mostly read_write u8:4 received_ce_pending read_mostly read_write u32[3] delivered_ecn_bytes read_write +u16 pkts_acked_ewma read_write u8:2 syn_ect_snt write_mostly read_write u8:2 syn_ect_rcv read_mostly read_write u8:2 accecn_minlen write_mostly read_write diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 367b491ddf97..fbc514d582e7 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -342,6 +342,7 @@ struct tcp_sock { u32 rate_interval_us; /* saved rate sample: time elapsed */ u32 rcv_rtt_last_tsecr; u32 delivered_ecn_bytes[3]; + u16 pkts_acked_ewma;/* Pkts acked EWMA for AccECN cep heuristic */ u64 first_tx_mstamp; /* start of window send phase */ u64 delivered_mstamp; /* time we reached "delivered" */ u64 bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 6e94c5859f4b..49f1029b5f8b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3470,6 +3470,7 @@ int tcp_disconnect(struct sock *sk, int flags) tcp_accecn_init_counters(tp); tp->prev_ecnfield = 0; tp->accecn_opt_tstamp = 0; + tp->pkts_acked_ewma = 0; if (icsk->icsk_ca_initialized && icsk->icsk_ca_ops->release) icsk->icsk_ca_ops->release(sk); memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); @@ -5243,6 +5244,7 @@ static void __init tcp_struct_check(void) CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rate_interval_us); CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rcv_rtt_last_tsecr); CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, delivered_ecn_bytes); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, pkts_acked_ewma); CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, first_tx_mstamp); CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, delivered_mstamp); CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, bytes_acked); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a2a872382fc0..38852e04229a 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -488,6 +488,10 @@ static void tcp_count_delivered(struct tcp_sock *tp, u32 delivered, tcp_count_delivered_ce(tp, delivered); } +#define PKTS_ACKED_WEIGHT 6 +#define PKTS_ACKED_PREC 6 +#define ACK_COMP_THRESH 4 + /* Returns the ECN CE delta */ static u32 __tcp_accecn_process(struct sock *sk, const struct sk_buff *skb, u32 delivered_pkts, u32 delivered_bytes, @@ -499,6 +503,7 @@ static u32 __tcp_accecn_process(struct sock *sk, const struct sk_buff *skb, u32 delta, safe_delta, d_ceb; bool opt_deltas_valid; u32 corrected_ace; + u32 ewma; /* Reordered ACK or uncertain due to lack of data to send and ts */ if (!(flag & (FLAG_FORWARD_PROGRESS | FLAG_TS_PROGRESS))) @@ -507,6 +512,18 @@ static u32 __tcp_accecn_process(struct sock *sk, const struct sk_buff *skb, opt_deltas_valid = tcp_accecn_process_option(tp, skb, delivered_bytes, flag); + if (delivered_pkts) { + if (!tp->pkts_acked_ewma) { + ewma = delivered_pkts << PKTS_ACKED_PREC; + } else { + ewma = tp->pkts_acked_ewma; + ewma = (((ewma << PKTS_ACKED_WEIGHT) - ewma) + + (delivered_pkts << PKTS_ACKED_PREC)) >> + PKTS_ACKED_WEIGHT; + } + tp->pkts_acked_ewma = min_t(u32, ewma, 0xFFFFU); + } + if (!(flag & FLAG_SLOWPATH)) { /* AccECN counter might overflow on large ACKs */ if (delivered_pkts <= TCP_ACCECN_CEP_ACE_MASK) @@ -555,7 +572,8 @@ static u32 __tcp_accecn_process(struct sock *sk, const struct sk_buff *skb, if (d_ceb < safe_delta * tp->mss_cache >> TCP_ACCECN_SAFETY_SHIFT) return delta; - } + } else if (tp->pkts_acked_ewma > (ACK_COMP_THRESH << PKTS_ACKED_PREC)) + return delta; return safe_delta; } From ab4c8b6f7fcbca7644487e08e76e4e224414bca5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Sat, 31 Jan 2026 23:25:02 +0100 Subject: [PATCH 02/15] gro: flushing when CWR is set negatively affects AccECN MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As AccECN may keep CWR bit asserted due to different interpretation of the bit, flushing with GRO because of CWR may effectively disable GRO until AccECN counter field changes such that CWR-bit becomes 0. There is no harm done from not immediately forwarding the CWR'ed segment with RFC3168 ECN. Signed-off-by: Ilpo Järvinen Signed-off-by: Chia-Yu Chang Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260131222515.8485-3-chia-yu.chang@nokia-bell-labs.com Signed-off-by: Paolo Abeni --- net/ipv4/tcp_offload.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 942a948f1a31..3b1fdcd3cb29 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -304,8 +304,7 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb, goto out_check_final; th2 = tcp_hdr(p); - flush = (__force int)(flags & TCP_FLAG_CWR); - flush |= (__force int)((flags ^ tcp_flag_word(th2)) & + flush = (__force int)((flags ^ tcp_flag_word(th2)) & ~(TCP_FLAG_FIN | TCP_FLAG_PSH)); flush |= (__force int)(th->ack_seq ^ th2->ack_seq); for (i = sizeof(*th); i < thlen; i += 4) From 6f74bc8b6e8d0e8218c1342682dadb156603d13e Mon Sep 17 00:00:00 2001 From: Chia-Yu Chang Date: Sat, 31 Jan 2026 23:25:03 +0100 Subject: [PATCH 03/15] selftests/net: gro: add self-test for TCP CWR flag Currently, GRO does not flush packets when the CWR bit is set. A corresponding self-test is being added, in which the CWR flag is set for two consecutive packets, but the first packet with the CWR flag set will not be flushed immediately. +===================+==========+===============+===========+ | Packet id | CWR flag | Payload | Flushing? | +===================+==========+===============+===========+ | 0 | 0 | PAYLOAD_LEN | 0 | | ... | 0 | PAYLOAD_LEN | 1 | +-------------------+----------+---------------+-----------+ | NUM_PACKETS/2 - 1 | 1 | payload_len | 0 | | NUM_PACKETS/2 | 1 | payload_len | 1 | +-------------------+----------+---------------+-----------+ | ... | 0 | PAYLOAD_LEN | 0 | | NUM_PACKETS | 0 | PAYLOAD_LEN | 1 | +===================+==========+===============+===========+ Signed-off-by: Chia-Yu Chang Acked-by: Paolo Abeni Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260131222515.8485-4-chia-yu.chang@nokia-bell-labs.com Signed-off-by: Paolo Abeni --- tools/testing/selftests/drivers/net/gro.c | 81 ++++++++++++++++------ tools/testing/selftests/drivers/net/gro.py | 3 +- 2 files changed, 60 insertions(+), 24 deletions(-) diff --git a/tools/testing/selftests/drivers/net/gro.c b/tools/testing/selftests/drivers/net/gro.c index e76c618704cf..3c0745b68bfa 100644 --- a/tools/testing/selftests/drivers/net/gro.c +++ b/tools/testing/selftests/drivers/net/gro.c @@ -17,8 +17,8 @@ * Pure ACK does not coalesce. * * flags_*: - * No packets with PSH, SYN, URG, RST set will be coalesced. - * - flags_psh, flags_syn, flags_rst, flags_urg + * No packets with PSH, SYN, URG, RST, CWR set will be coalesced. + * - flags_psh, flags_syn, flags_rst, flags_urg, flags_cwr * * tcp_*: * Packets with incorrect checksum, non-consecutive seqno and @@ -360,32 +360,58 @@ static void create_packet(void *buf, int seq_offset, int ack_offset, fill_datalinklayer(buf); } -/* send one extra flag, not first and not last pkt */ -static void send_flags(int fd, struct sockaddr_ll *daddr, int psh, int syn, - int rst, int urg) +#ifndef TH_CWR +#define TH_CWR 0x80 +#endif +static void set_flags(struct tcphdr *tcph, int payload_len, int psh, int syn, + int rst, int urg, int cwr) { - static char flag_buf[MAX_HDR_LEN + PAYLOAD_LEN]; - static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; - int payload_len, pkt_size, flag, i; - struct tcphdr *tcph; - - payload_len = PAYLOAD_LEN * psh; - pkt_size = total_hdr_len + payload_len; - flag = NUM_PACKETS / 2; - - create_packet(flag_buf, flag * payload_len, 0, payload_len, 0); - - tcph = (struct tcphdr *)(flag_buf + tcp_offset); tcph->psh = psh; tcph->syn = syn; tcph->rst = rst; tcph->urg = urg; + if (cwr) + tcph->th_flags |= TH_CWR; + else + tcph->th_flags &= ~TH_CWR; tcph->check = 0; tcph->check = tcp_checksum(tcph, payload_len); +} + +/* send extra flags of the (NUM_PACKETS / 2) and (NUM_PACKETS / 2 - 1) + * pkts, not first and not last pkt + */ +static void send_flags(int fd, struct sockaddr_ll *daddr, int psh, int syn, + int rst, int urg, int cwr) +{ + static char flag_buf[2][MAX_HDR_LEN + PAYLOAD_LEN]; + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + int payload_len, pkt_size, i; + struct tcphdr *tcph; + int flag[2]; + + payload_len = PAYLOAD_LEN * (psh || cwr); + pkt_size = total_hdr_len + payload_len; + flag[0] = NUM_PACKETS / 2; + flag[1] = NUM_PACKETS / 2 - 1; + + /* Create and configure packets with flags + */ + for (i = 0; i < 2; i++) { + if (flag[i] > 0) { + create_packet(flag_buf[i], flag[i] * payload_len, 0, + payload_len, 0); + tcph = (struct tcphdr *)(flag_buf[i] + tcp_offset); + set_flags(tcph, payload_len, psh, syn, rst, urg, cwr); + } + } for (i = 0; i < NUM_PACKETS + 1; i++) { - if (i == flag) { - write_packet(fd, flag_buf, pkt_size, daddr); + if (i == flag[0]) { + write_packet(fd, flag_buf[0], pkt_size, daddr); + continue; + } else if (i == flag[1] && cwr) { + write_packet(fd, flag_buf[1], pkt_size, daddr); continue; } create_packet(buf, i * PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); @@ -1068,16 +1094,19 @@ static void gro_sender(void) /* flags sub-tests */ } else if (strcmp(testname, "flags_psh") == 0) { - send_flags(txfd, &daddr, 1, 0, 0, 0); + send_flags(txfd, &daddr, 1, 0, 0, 0, 0); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); } else if (strcmp(testname, "flags_syn") == 0) { - send_flags(txfd, &daddr, 0, 1, 0, 0); + send_flags(txfd, &daddr, 0, 1, 0, 0, 0); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); } else if (strcmp(testname, "flags_rst") == 0) { - send_flags(txfd, &daddr, 0, 0, 1, 0); + send_flags(txfd, &daddr, 0, 0, 1, 0, 0); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); } else if (strcmp(testname, "flags_urg") == 0) { - send_flags(txfd, &daddr, 0, 0, 0, 1); + send_flags(txfd, &daddr, 0, 0, 0, 1, 0); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (strcmp(testname, "flags_cwr") == 0) { + send_flags(txfd, &daddr, 0, 0, 0, 0, 1); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); /* tcp sub-tests */ @@ -1239,6 +1268,12 @@ static void gro_receiver(void) correct_payload[2] = PAYLOAD_LEN * 2; printf("urg flag ends coalescing: "); check_recv_pkts(rxfd, correct_payload, 3); + } else if (strcmp(testname, "flags_cwr") == 0) { + correct_payload[0] = PAYLOAD_LEN; + correct_payload[1] = PAYLOAD_LEN * 2; + correct_payload[2] = PAYLOAD_LEN * 2; + printf("cwr flag ends coalescing: "); + check_recv_pkts(rxfd, correct_payload, 3); /* tcp sub-tests */ } else if (strcmp(testname, "tcp_csum") == 0) { diff --git a/tools/testing/selftests/drivers/net/gro.py b/tools/testing/selftests/drivers/net/gro.py index 1bb8af571456..cbc1b19dbc91 100755 --- a/tools/testing/selftests/drivers/net/gro.py +++ b/tools/testing/selftests/drivers/net/gro.py @@ -17,6 +17,7 @@ Test cases: - flags_syn: Packets with SYN flag don't coalesce - flags_rst: Packets with RST flag don't coalesce - flags_urg: Packets with URG flag don't coalesce + - flags_cwr: Packets with CWR flag don't coalesce - tcp_csum: Packets with incorrect checksum don't coalesce - tcp_seq: Packets with non-consecutive seqno don't coalesce - tcp_ts: Packets with different timestamp options don't coalesce @@ -191,7 +192,7 @@ def _gro_variants(): common_tests = [ "data_same", "data_lrg_sml", "data_sml_lrg", "ack", - "flags_psh", "flags_syn", "flags_rst", "flags_urg", + "flags_psh", "flags_syn", "flags_rst", "flags_urg", "flags_cwr", "tcp_csum", "tcp_seq", "tcp_ts", "tcp_opt", "ip_ecn", "ip_tos", "large_max", "large_rem", From 100f946b8d44b64bc0b8a8c30d283105031c0a77 Mon Sep 17 00:00:00 2001 From: Chia-Yu Chang Date: Sat, 31 Jan 2026 23:25:04 +0100 Subject: [PATCH 04/15] tcp: ECT_1_NEGOTIATION and NEEDS_ACCECN identifiers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two flags for congestion control (CC) module are added in this patch related to AccECN negotiation. First, a new flag (TCP_CONG_NEEDS_ACCECN) defines that the CC expects to negotiate AccECN functionality using the ECE, CWR and AE flags in the TCP header. Second, during ECN negotiation, ECT(0) in the IP header is used. This patch enables CC to control whether ECT(0) or ECT(1) should be used on a per-segment basis. A new flag (TCP_CONG_ECT_1_NEGOTIATION) defines the expected ECT value in the IP header by the CA when not-yet initialized for the connection. The detailed AccECN negotiaotn can be found in IETF RFC9768. Co-developed-by: Olivier Tilmans Signed-off-by: Olivier Tilmans Signed-off-by: Ilpo Järvinen Signed-off-by: Chia-Yu Chang Acked-by: Paolo Abeni Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260131222515.8485-5-chia-yu.chang@nokia-bell-labs.com Signed-off-by: Paolo Abeni --- include/net/inet_ecn.h | 20 +++++++++++++++++--- include/net/tcp.h | 21 ++++++++++++++++++++- include/net/tcp_ecn.h | 13 ++++++++++--- net/ipv4/tcp_cong.c | 5 +++-- net/ipv4/tcp_input.c | 3 ++- 5 files changed, 52 insertions(+), 10 deletions(-) diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h index ea32393464a2..827b87a95dab 100644 --- a/include/net/inet_ecn.h +++ b/include/net/inet_ecn.h @@ -51,11 +51,25 @@ static inline __u8 INET_ECN_encapsulate(__u8 outer, __u8 inner) return outer; } +/* Apply either ECT(0) or ECT(1) */ +static inline void __INET_ECN_xmit(struct sock *sk, bool use_ect_1) +{ + __u8 ect = use_ect_1 ? INET_ECN_ECT_1 : INET_ECN_ECT_0; + + /* Mask the complete byte in case the connection alternates between + * ECT(0) and ECT(1). + */ + inet_sk(sk)->tos &= ~INET_ECN_MASK; + inet_sk(sk)->tos |= ect; + if (inet6_sk(sk)) { + inet6_sk(sk)->tclass &= ~INET_ECN_MASK; + inet6_sk(sk)->tclass |= ect; + } +} + static inline void INET_ECN_xmit(struct sock *sk) { - inet_sk(sk)->tos |= INET_ECN_ECT_0; - if (inet6_sk(sk) != NULL) - inet6_sk(sk)->tclass |= INET_ECN_ECT_0; + __INET_ECN_xmit(sk, false); } static inline void INET_ECN_dontxmit(struct sock *sk) diff --git a/include/net/tcp.h b/include/net/tcp.h index cecec1a92d5e..c122ff2f364b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1215,7 +1215,12 @@ enum tcp_ca_ack_event_flags { #define TCP_CONG_NON_RESTRICTED BIT(0) /* Requires ECN/ECT set on all packets */ #define TCP_CONG_NEEDS_ECN BIT(1) -#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN) +/* Require successfully negotiated AccECN capability */ +#define TCP_CONG_NEEDS_ACCECN BIT(2) +/* Use ECT(1) instead of ECT(0) while the CA is uninitialized */ +#define TCP_CONG_ECT_1_NEGOTIATION BIT(3) +#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN | \ + TCP_CONG_NEEDS_ACCECN | TCP_CONG_ECT_1_NEGOTIATION) union tcp_cc_info; @@ -1356,6 +1361,20 @@ static inline bool tcp_ca_needs_ecn(const struct sock *sk) return icsk->icsk_ca_ops->flags & TCP_CONG_NEEDS_ECN; } +static inline bool tcp_ca_needs_accecn(const struct sock *sk) +{ + const struct inet_connection_sock *icsk = inet_csk(sk); + + return icsk->icsk_ca_ops->flags & TCP_CONG_NEEDS_ACCECN; +} + +static inline bool tcp_ca_ect_1_negotiation(const struct sock *sk) +{ + const struct inet_connection_sock *icsk = inet_csk(sk); + + return icsk->icsk_ca_ops->flags & TCP_CONG_ECT_1_NEGOTIATION; +} + static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event) { const struct inet_connection_sock *icsk = inet_csk(sk); diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h index f13e5cd2b1ac..fdde1c342b35 100644 --- a/include/net/tcp_ecn.h +++ b/include/net/tcp_ecn.h @@ -31,6 +31,12 @@ enum tcp_accecn_option { TCP_ACCECN_OPTION_FULL = 2, }; +/* Apply either ECT(0) or ECT(1) based on TCP_CONG_ECT_1_NEGOTIATION flag */ +static inline void INET_ECN_xmit_ect_1_negotiation(struct sock *sk) +{ + __INET_ECN_xmit(sk, tcp_ca_ect_1_negotiation(sk)); +} + static inline void tcp_ecn_queue_cwr(struct tcp_sock *tp) { /* Do not set CWR if in AccECN mode! */ @@ -561,7 +567,7 @@ static inline void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb) TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE; else if (tcp_ca_needs_ecn(sk) || tcp_bpf_ca_needs_ecn(sk)) - INET_ECN_xmit(sk); + INET_ECN_xmit_ect_1_negotiation(sk); if (tp->ecn_flags & TCP_ECN_MODE_ACCECN) { TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ACE; @@ -579,7 +585,8 @@ static inline void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) bool use_ecn, use_accecn; u8 tcp_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn); - use_accecn = tcp_ecn == TCP_ECN_IN_ACCECN_OUT_ACCECN; + use_accecn = tcp_ecn == TCP_ECN_IN_ACCECN_OUT_ACCECN || + tcp_ca_needs_accecn(sk); use_ecn = tcp_ecn == TCP_ECN_IN_ECN_OUT_ECN || tcp_ecn == TCP_ECN_IN_ACCECN_OUT_ECN || tcp_ca_needs_ecn(sk) || bpf_needs_ecn || use_accecn; @@ -595,7 +602,7 @@ static inline void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) if (use_ecn) { if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn) - INET_ECN_xmit(sk); + INET_ECN_xmit_ect_1_negotiation(sk); TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR; if (use_accecn) { diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index df758adbb445..e9f6c77e0631 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -16,6 +16,7 @@ #include #include #include +#include #include static DEFINE_SPINLOCK(tcp_cong_list_lock); @@ -227,7 +228,7 @@ void tcp_assign_congestion_control(struct sock *sk) memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); if (ca->flags & TCP_CONG_NEEDS_ECN) - INET_ECN_xmit(sk); + INET_ECN_xmit_ect_1_negotiation(sk); else INET_ECN_dontxmit(sk); } @@ -257,7 +258,7 @@ static void tcp_reinit_congestion_control(struct sock *sk, memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); if (ca->flags & TCP_CONG_NEEDS_ECN) - INET_ECN_xmit(sk); + INET_ECN_xmit_ect_1_negotiation(sk); else INET_ECN_dontxmit(sk); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 38852e04229a..e823a3b1ad3b 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -7495,7 +7495,8 @@ static void tcp_ecn_create_request(struct request_sock *req, u32 ecn_ok_dst; if (tcp_accecn_syn_requested(th) && - READ_ONCE(net->ipv4.sysctl_tcp_ecn) >= 3) { + (READ_ONCE(net->ipv4.sysctl_tcp_ecn) >= 3 || + tcp_ca_needs_accecn(listen_sk))) { inet_rsk(req)->ecn_ok = 1; tcp_rsk(req)->accecn_ok = 1; tcp_rsk(req)->syn_ect_rcv = TCP_SKB_CB(skb)->ip_dsfield & From e68c28f22f46ecfdec3656ae785dd8ccbb4d557d Mon Sep 17 00:00:00 2001 From: Chia-Yu Chang Date: Sat, 31 Jan 2026 23:25:05 +0100 Subject: [PATCH 05/15] tcp: disable RFC3168 fallback identifier for CC modules When AccECN is not successfully negociated for a TCP flow, it defaults fallback to classic ECN (RFC3168). However, L4S service will fallback to non-ECN. This patch enables congestion control module to control whether it should not fallback to classic ECN after unsuccessful AccECN negotiation. A new CA module flag (TCP_CONG_NO_FALLBACK_RFC3168) identifies this behavior expected by the CA. Signed-off-by: Chia-Yu Chang Acked-by: Paolo Abeni Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260131222515.8485-6-chia-yu.chang@nokia-bell-labs.com Signed-off-by: Paolo Abeni --- include/net/tcp.h | 12 +++++++++++- include/net/tcp_ecn.h | 11 ++++++++--- net/ipv4/tcp_input.c | 2 +- net/ipv4/tcp_minisocks.c | 7 ++++--- 4 files changed, 24 insertions(+), 8 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index c122ff2f364b..cace4fbe38d0 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1219,8 +1219,11 @@ enum tcp_ca_ack_event_flags { #define TCP_CONG_NEEDS_ACCECN BIT(2) /* Use ECT(1) instead of ECT(0) while the CA is uninitialized */ #define TCP_CONG_ECT_1_NEGOTIATION BIT(3) +/* Cannot fallback to RFC3168 during AccECN negotiation */ +#define TCP_CONG_NO_FALLBACK_RFC3168 BIT(4) #define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN | \ - TCP_CONG_NEEDS_ACCECN | TCP_CONG_ECT_1_NEGOTIATION) + TCP_CONG_NEEDS_ACCECN | TCP_CONG_ECT_1_NEGOTIATION | \ + TCP_CONG_NO_FALLBACK_RFC3168) union tcp_cc_info; @@ -1375,6 +1378,13 @@ static inline bool tcp_ca_ect_1_negotiation(const struct sock *sk) return icsk->icsk_ca_ops->flags & TCP_CONG_ECT_1_NEGOTIATION; } +static inline bool tcp_ca_no_fallback_rfc3168(const struct sock *sk) +{ + const struct inet_connection_sock *icsk = inet_csk(sk); + + return icsk->icsk_ca_ops->flags & TCP_CONG_NO_FALLBACK_RFC3168; +} + static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event) { const struct inet_connection_sock *icsk = inet_csk(sk); diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h index fdde1c342b35..2e1637edf1d3 100644 --- a/include/net/tcp_ecn.h +++ b/include/net/tcp_ecn.h @@ -507,7 +507,9 @@ static inline void tcp_ecn_rcv_synack(struct sock *sk, const struct sk_buff *skb * | ECN | AccECN | 0 0 1 | Classic ECN | * +========+========+============+=============+ */ - if (tcp_ecn_mode_pending(tp)) + if (tcp_ca_no_fallback_rfc3168(sk)) + tcp_ecn_mode_set(tp, TCP_ECN_DISABLED); + else if (tcp_ecn_mode_pending(tp)) /* Downgrade from AccECN, or requested initially */ tcp_ecn_mode_set(tp, TCP_ECN_MODE_RFC3168); break; @@ -531,9 +533,11 @@ static inline void tcp_ecn_rcv_synack(struct sock *sk, const struct sk_buff *skb } } -static inline void tcp_ecn_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th, +static inline void tcp_ecn_rcv_syn(struct sock *sk, const struct tcphdr *th, const struct sk_buff *skb) { + struct tcp_sock *tp = tcp_sk(sk); + if (tcp_ecn_mode_pending(tp)) { if (!tcp_accecn_syn_requested(th)) { /* Downgrade to classic ECN feedback */ @@ -545,7 +549,8 @@ static inline void tcp_ecn_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th, tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN); } } - if (tcp_ecn_mode_rfc3168(tp) && (!th->ece || !th->cwr)) + if (tcp_ecn_mode_rfc3168(tp) && + (!th->ece || !th->cwr || tcp_ca_no_fallback_rfc3168(sk))) tcp_ecn_mode_set(tp, TCP_ECN_DISABLED); } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e823a3b1ad3b..59dafcb45c16 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -7090,7 +7090,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, tp->snd_wl1 = TCP_SKB_CB(skb)->seq; tp->max_window = tp->snd_wnd; - tcp_ecn_rcv_syn(tp, th, skb); + tcp_ecn_rcv_syn(sk, th, skb); tcp_mtup_init(sk); tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index bd5462154f97..9776c921d1bb 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -485,9 +485,10 @@ static void tcp_ecn_openreq_child(struct sock *sk, tp->accecn_opt_demand = 1; tcp_ecn_received_counters_payload(sk, skb); } else { - tcp_ecn_mode_set(tp, inet_rsk(req)->ecn_ok ? - TCP_ECN_MODE_RFC3168 : - TCP_ECN_DISABLED); + if (inet_rsk(req)->ecn_ok && !tcp_ca_no_fallback_rfc3168(sk)) + tcp_ecn_mode_set(tp, TCP_ECN_MODE_RFC3168); + else + tcp_ecn_mode_set(tp, TCP_ECN_DISABLED); } } From c5ff6b83715919767f181f13e992b5055812a194 Mon Sep 17 00:00:00 2001 From: Chia-Yu Chang Date: Sat, 31 Jan 2026 23:25:06 +0100 Subject: [PATCH 06/15] tcp: accecn: handle unexpected AccECN negotiation feedback According to Sections 3.1.2 and 3.1.3 of AccECN spec (RFC9768). In Section 3.1.2, it says an AccECN implementation has no need to recognize or support the Server response labelled 'Nonce' or ECN-nonce feedback more generally, as RFC 3540 has been reclassified as Historic. AccECN is compatible with alternative ECN feedback integrity approaches to the nonce. The SYN/ACK labelled 'Nonce' with (AE,CWR,ECE) = (1,0,1) is reserved for future use. A TCP Client (A) that receives such a SYN/ACK follows the procedure for forward compatibility given in Section 3.1.3. Then in Section 3.1.3, it says if a TCP Client has sent a SYN requesting AccECN feedback with (AE,CWR,ECE) = (1,1,1) then receives a SYN/ACK with the currently reserved combination (AE,CWR,ECE) = (1,0,1) but it does not have logic specific to such a combination, the Client MUST enable AccECN mode as if the SYN/ACK onfirmed that the Server supported AccECN and as if it fed back that the IP-ECN field on the SYN had arrived unchanged. Fixes: 3cae34274c79 ("tcp: accecn: AccECN negotiation"). Signed-off-by: Chia-Yu Chang Acked-by: Paolo Abeni Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260131222515.8485-7-chia-yu.chang@nokia-bell-labs.com Signed-off-by: Paolo Abeni --- include/net/tcp_ecn.h | 44 ++++++++++++++++++++++++++++++------------- 1 file changed, 31 insertions(+), 13 deletions(-) diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h index 2e1637edf1d3..a709fb1756eb 100644 --- a/include/net/tcp_ecn.h +++ b/include/net/tcp_ecn.h @@ -473,6 +473,26 @@ static inline u8 tcp_accecn_option_init(const struct sk_buff *skb, return TCP_ACCECN_OPT_COUNTER_SEEN; } +static inline void tcp_ecn_rcv_synack_accecn(struct sock *sk, + const struct sk_buff *skb, u8 dsf) +{ + struct tcp_sock *tp = tcp_sk(sk); + + tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN); + tp->syn_ect_rcv = dsf & INET_ECN_MASK; + /* Demand Accurate ECN option in response to the SYN on the SYN/ACK + * and the TCP server will try to send one more packet with an AccECN + * Option at a later point during the connection. + */ + if (tp->rx_opt.accecn && + tp->saw_accecn_opt < TCP_ACCECN_OPT_COUNTER_SEEN) { + u8 saw_opt = tcp_accecn_option_init(skb, tp->rx_opt.accecn); + + tcp_accecn_saw_opt_fail_recv(tp, saw_opt); + tp->accecn_opt_demand = 2; + } +} + /* See Table 2 of the AccECN draft */ static inline void tcp_ecn_rcv_synack(struct sock *sk, const struct sk_buff *skb, const struct tcphdr *th, u8 ip_dsfield) @@ -495,13 +515,11 @@ static inline void tcp_ecn_rcv_synack(struct sock *sk, const struct sk_buff *skb tcp_ecn_mode_set(tp, TCP_ECN_DISABLED); break; case 0x1: - case 0x5: /* +========+========+============+=============+ * | A | B | SYN/ACK | Feedback | * | | | B->A | Mode of A | * | | | AE CWR ECE | | * +========+========+============+=============+ - * | AccECN | Nonce | 1 0 1 | (Reserved) | * | AccECN | ECN | 0 0 1 | Classic ECN | * | Nonce | AccECN | 0 0 1 | Classic ECN | * | ECN | AccECN | 0 0 1 | Classic ECN | @@ -509,20 +527,20 @@ static inline void tcp_ecn_rcv_synack(struct sock *sk, const struct sk_buff *skb */ if (tcp_ca_no_fallback_rfc3168(sk)) tcp_ecn_mode_set(tp, TCP_ECN_DISABLED); - else if (tcp_ecn_mode_pending(tp)) - /* Downgrade from AccECN, or requested initially */ + else tcp_ecn_mode_set(tp, TCP_ECN_MODE_RFC3168); break; - default: - tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN); - tp->syn_ect_rcv = ip_dsfield & INET_ECN_MASK; - if (tp->rx_opt.accecn && - tp->saw_accecn_opt < TCP_ACCECN_OPT_COUNTER_SEEN) { - u8 saw_opt = tcp_accecn_option_init(skb, tp->rx_opt.accecn); - - tcp_accecn_saw_opt_fail_recv(tp, saw_opt); - tp->accecn_opt_demand = 2; + case 0x5: + if (tcp_ecn_mode_pending(tp)) { + tcp_ecn_rcv_synack_accecn(sk, skb, ip_dsfield); + if (INET_ECN_is_ce(ip_dsfield)) { + tp->received_ce++; + tp->received_ce_pending++; + } } + break; + default: + tcp_ecn_rcv_synack_accecn(sk, skb, ip_dsfield); if (INET_ECN_is_ce(ip_dsfield) && tcp_accecn_validate_syn_feedback(sk, ace, tp->syn_ect_snt)) { From 3ae62b8b4a4848f745d5ed12f10056cb4e4c81f0 Mon Sep 17 00:00:00 2001 From: Chia-Yu Chang Date: Sat, 31 Jan 2026 23:25:07 +0100 Subject: [PATCH 07/15] tcp: accecn: retransmit downgraded SYN in AccECN negotiation Based on AccECN spec (RFC9768) Section 3.1.4.1, if the sender of an AccECN SYN (the TCP Client) times out before receiving the SYN/ACK, it SHOULD attempt to negotiate the use of AccECN at least one more time by continuing to set all three TCP ECN flags (AE,CWR,ECE) = (1,1,1) on the first retransmitted SYN (using the usual retransmission time-outs). If this first retransmission also fails to be acknowledged, in deployment scenarios where AccECN path traversal might be problematic, the TCP Client SHOULD send subsequent retransmissions of the SYN with the three TCP-ECN flags cleared (AE,CWR,ECE) = (0,0,0). Signed-off-by: Chia-Yu Chang Acked-by: Paolo Abeni Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260131222515.8485-8-chia-yu.chang@nokia-bell-labs.com Signed-off-by: Paolo Abeni --- net/ipv4/tcp_output.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 597e888af36d..b28596655d73 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3606,12 +3606,15 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) tcp_retrans_try_collapse(sk, skb, avail_wnd); } - /* RFC3168, section 6.1.1.1. ECN fallback - * As AccECN uses the same SYN flags (+ AE), this check covers both - * cases. - */ - if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN_ECN) == TCPHDR_SYN_ECN) - tcp_ecn_clear_syn(sk, skb); + if (!tcp_ecn_mode_pending(tp) || icsk->icsk_retransmits > 1) { + /* RFC3168, section 6.1.1.1. ECN fallback + * As AccECN uses the same SYN flags (+ AE), this check + * covers both cases. + */ + if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN_ECN) == + TCPHDR_SYN_ECN) + tcp_ecn_clear_syn(sk, skb); + } /* Update global and local TCP statistics. */ segs = tcp_skb_pcount(skb); From f1eaea5585e4d193e007d671f0916f4371c72c0b Mon Sep 17 00:00:00 2001 From: Chia-Yu Chang Date: Sat, 31 Jan 2026 23:25:08 +0100 Subject: [PATCH 08/15] tcp: add TCP_SYNACK_RETRANS synack_type Before this patch, retransmitted SYN/ACK did not have a specific synack_type; however, the upcoming patch needs to distinguish between retransmitted and non-retransmitted SYN/ACK for AccECN negotiation to transmit the fallback SYN/ACK during AccECN negotiation. Therefore, this patch introduces a new synack_type (TCP_SYNACK_RETRANS). Signed-off-by: Chia-Yu Chang Acked-by: Paolo Abeni Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260131222515.8485-9-chia-yu.chang@nokia-bell-labs.com Signed-off-by: Paolo Abeni --- include/net/tcp.h | 1 + net/ipv4/tcp_output.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index cace4fbe38d0..6c12be2cdd4d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -552,6 +552,7 @@ enum tcp_synack_type { TCP_SYNACK_NORMAL, TCP_SYNACK_FASTOPEN, TCP_SYNACK_COOKIE, + TCP_SYNACK_RETRANS, }; struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, struct request_sock *req, diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b28596655d73..a1596fe8dd9f 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3929,6 +3929,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, switch (synack_type) { case TCP_SYNACK_NORMAL: + case TCP_SYNACK_RETRANS: skb_set_owner_edemux(skb, req_to_sk(req)); break; case TCP_SYNACK_COOKIE: @@ -4614,7 +4615,7 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req) /* Paired with WRITE_ONCE() in sock_setsockopt() */ if (READ_ONCE(sk->sk_txrehash) == SOCK_TXREHASH_ENABLED) WRITE_ONCE(tcp_rsk(req)->txhash, net_tx_rndhash()); - res = af_ops->send_synack(sk, NULL, &fl, req, NULL, TCP_SYNACK_NORMAL, + res = af_ops->send_synack(sk, NULL, &fl, req, NULL, TCP_SYNACK_RETRANS, NULL); if (!res) { TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS); From f326f1f17f3772dae4b37360c639ed6b76fe6354 Mon Sep 17 00:00:00 2001 From: Chia-Yu Chang Date: Sat, 31 Jan 2026 23:25:09 +0100 Subject: [PATCH 09/15] tcp: accecn: retransmit SYN/ACK without AccECN option or non-AccECN SYN/ACK For Accurate ECN, the first SYN/ACK sent by the TCP server shall set the ACE flag (Table 1 of RFC9768) and the AccECN option to complete the capability negotiation. However, if the TCP server needs to retransmit such a SYN/ACK (for example, because it did not receive an ACK acknowledging its SYN/ACK, or received a second SYN requesting AccECN support), the TCP server retransmits the SYN/ACK without the AccECN option. This is because the SYN/ACK may be lost due to congestion, or a middlebox may block the AccECN option. Furthermore, if this retransmission also times out, to expedite connection establishment, the TCP server should retransmit the SYN/ACK with (AE,CWR,ECE) = (0,0,0) and without the AccECN option, while maintaining AccECN feedback mode. This complies with Section 3.2.3.2.2 of the AccECN spec RFC9768. Signed-off-by: Chia-Yu Chang Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260131222515.8485-10-chia-yu.chang@nokia-bell-labs.com Signed-off-by: Paolo Abeni --- include/net/tcp_ecn.h | 20 +++++++++++++++----- net/ipv4/tcp_output.c | 4 ++-- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h index a709fb1756eb..796c613b5ef3 100644 --- a/include/net/tcp_ecn.h +++ b/include/net/tcp_ecn.h @@ -649,12 +649,22 @@ static inline void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb) } static inline void -tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th) +tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th, + enum tcp_synack_type synack_type) { - if (tcp_rsk(req)->accecn_ok) - tcp_accecn_echo_syn_ect(th, tcp_rsk(req)->syn_ect_rcv); - else if (inet_rsk(req)->ecn_ok) - th->ece = 1; + /* Accurate ECN shall retransmit SYN/ACK with ACE=0 if the + * previously retransmitted SYN/ACK also times out. + */ + if (!req->num_timeout || synack_type != TCP_SYNACK_RETRANS) { + if (tcp_rsk(req)->accecn_ok) + tcp_accecn_echo_syn_ect(th, tcp_rsk(req)->syn_ect_rcv); + else if (inet_rsk(req)->ecn_ok) + th->ece = 1; + } else if (tcp_rsk(req)->accecn_ok) { + th->ae = 0; + th->cwr = 0; + th->ece = 0; + } } static inline bool tcp_accecn_option_beacon_check(const struct sock *sk) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index a1596fe8dd9f..22728daed436 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1106,7 +1106,7 @@ static unsigned int tcp_synack_options(const struct sock *sk, if (treq->accecn_ok && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_option) && - req->num_timeout < 1 && remaining >= TCPOLEN_ACCECN_BASE) { + synack_type != TCP_SYNACK_RETRANS && remaining >= TCPOLEN_ACCECN_BASE) { opts->use_synack_ecn_bytes = 1; remaining -= tcp_options_fit_accecn(opts, 0, remaining); } @@ -4012,7 +4012,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, memset(th, 0, sizeof(struct tcphdr)); th->syn = 1; th->ack = 1; - tcp_ecn_make_synack(req, th); + tcp_ecn_make_synack(req, th, synack_type); th->source = htons(ireq->ir_num); th->dest = ireq->ir_rmt_port; skb->mark = ireq->ir_mark; From 4024081feb87fb11ac7bd36d8c397ccdd81ed302 Mon Sep 17 00:00:00 2001 From: Chia-Yu Chang Date: Sat, 31 Jan 2026 23:25:10 +0100 Subject: [PATCH 10/15] tcp: accecn: unset ECT if receive or send ACE=0 in AccECN negotiaion Based on specification: https://tools.ietf.org/id/draft-ietf-tcpm-accurate-ecn-28.txt Based on Section 3.1.5 of AccECN spec (RFC9768), a TCP Server in AccECN mode MUST NOT set ECT on any packet for the rest of the connection, if it has received or sent at least one valid SYN or Acceptable SYN/ACK with (AE,CWR,ECE) = (0,0,0) during the handshake. In addition, a host in AccECN mode that is feeding back the IP-ECN field on a SYN or SYN/ACK MUST feed back the IP-ECN field on the latest valid SYN or acceptable SYN/ACK to arrive. Signed-off-by: Chia-Yu Chang Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260131222515.8485-11-chia-yu.chang@nokia-bell-labs.com Signed-off-by: Paolo Abeni --- net/ipv4/inet_connection_sock.c | 3 +++ net/ipv4/tcp_input.c | 2 ++ net/ipv4/tcp_minisocks.c | 32 ++++++++++++++++++++++++-------- net/ipv4/tcp_output.c | 5 ++++- net/ipv4/tcp_timer.c | 3 +++ 5 files changed, 36 insertions(+), 9 deletions(-) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 13372d2cbed5..018e8ffc0717 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -1103,6 +1104,8 @@ static void reqsk_timer_handler(struct timer_list *t) (!resend || !tcp_rtx_synack(sk_listener, req) || inet_rsk(req)->acked)) { + if (req->num_retrans > 1 && tcp_rsk(req)->accecn_ok) + tcp_rsk(req)->accecn_fail_mode |= TCP_ACCECN_ACE_FAIL_SEND; if (req->num_timeout++ == 0) atomic_dec(&queue->young); mod_timer(&req->rsk_timer, jiffies + tcp_reqsk_timeout(req)); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 59dafcb45c16..988d161e9918 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6469,6 +6469,8 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, if (th->syn) { if (tcp_ecn_mode_accecn(tp)) { accecn_reflector = true; + tp->syn_ect_rcv = TCP_SKB_CB(skb)->ip_dsfield & + INET_ECN_MASK; if (tp->rx_opt.accecn && tp->saw_accecn_opt < TCP_ACCECN_OPT_COUNTER_SEEN) { u8 saw_opt = tcp_accecn_option_init(skb, tp->rx_opt.accecn); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 9776c921d1bb..ec128865f5c0 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -481,6 +481,10 @@ static void tcp_ecn_openreq_child(struct sock *sk, tp->syn_ect_snt = treq->syn_ect_snt; tcp_accecn_third_ack(sk, skb, treq->syn_ect_snt); tp->saw_accecn_opt = treq->saw_accecn_opt; + if (treq->accecn_fail_mode & TCP_ACCECN_ACE_FAIL_SEND) + tcp_accecn_fail_mode_set(tp, TCP_ACCECN_ACE_FAIL_SEND); + if (treq->accecn_fail_mode & TCP_ACCECN_ACE_FAIL_RECV) + tcp_accecn_fail_mode_set(tp, TCP_ACCECN_ACE_FAIL_RECV); tp->prev_ecnfield = treq->syn_ect_rcv; tp->accecn_opt_demand = 1; tcp_ecn_received_counters_payload(sk, skb); @@ -749,16 +753,28 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, */ if (!tcp_oow_rate_limited(sock_net(sk), skb, LINUX_MIB_TCPACKSKIPPEDSYNRECV, - &tcp_rsk(req)->last_oow_ack_time) && + &tcp_rsk(req)->last_oow_ack_time)) { + if (tcp_rsk(req)->accecn_ok) { + u8 ect_rcv = TCP_SKB_CB(skb)->ip_dsfield & + INET_ECN_MASK; - !tcp_rtx_synack(sk, req)) { - unsigned long expires = jiffies; + tcp_rsk(req)->syn_ect_rcv = ect_rcv; + if (tcp_accecn_ace(tcp_hdr(skb)) == 0x0) + tcp_rsk(req)->accecn_fail_mode |= TCP_ACCECN_ACE_FAIL_RECV; + } + if (!tcp_rtx_synack(sk, req)) { + unsigned long expires = jiffies; - expires += tcp_reqsk_timeout(req); - if (!fastopen) - mod_timer_pending(&req->rsk_timer, expires); - else - req->rsk_timer.expires = expires; + if (req->num_retrans > 1 && tcp_rsk(req)->accecn_ok) + tcp_rsk(req)->accecn_fail_mode |= TCP_ACCECN_ACE_FAIL_SEND; + + expires += tcp_reqsk_timeout(req); + if (!fastopen) + mod_timer_pending(&req->rsk_timer, + expires); + else + req->rsk_timer.expires = expires; + } } return NULL; } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 22728daed436..2b356fdbf2ca 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -334,8 +334,11 @@ static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb, return; if (tcp_ecn_mode_accecn(tp)) { - if (!tcp_accecn_ace_fail_recv(tp)) + if (!tcp_accecn_ace_fail_recv(tp) && + !tcp_accecn_ace_fail_send(tp)) INET_ECN_xmit(sk); + else + INET_ECN_dontxmit(sk); tcp_accecn_set_ace(tp, skb, th); skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ACCECN; } else { diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 160080c9021d..5a14a53a3c9e 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -22,6 +22,7 @@ #include #include #include +#include #include static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk) @@ -479,6 +480,8 @@ static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req) * it's not good to give up too easily. */ tcp_rtx_synack(sk, req); + if (req->num_retrans > 1 && tcp_rsk(req)->accecn_ok) + tcp_rsk(req)->accecn_fail_mode |= TCP_ACCECN_ACE_FAIL_SEND; req->num_timeout++; tcp_update_rto_stats(sk); if (!tp->retrans_stamp) From 2ed661248e2b920f53db61ddfc74bc68ed10c83d Mon Sep 17 00:00:00 2001 From: Chia-Yu Chang Date: Sat, 31 Jan 2026 23:25:11 +0100 Subject: [PATCH 11/15] tcp: accecn: fallback outgoing half link to non-AccECN According to Section 3.2.2.1 of AccECN spec (RFC9768), if the Server is in AccECN mode and in SYN-RCVD state, and if it receives a value of zero on a pure ACK with SYN=0 and no SACK blocks, for the rest of the connection the Server MUST NOT set ECT on outgoing packets and MUST NOT respond to AccECN feedback. Nonetheless, as a Data Receiver it MUST NOT disable AccECN feedback. Signed-off-by: Chia-Yu Chang Acked-by: Paolo Abeni Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260131222515.8485-12-chia-yu.chang@nokia-bell-labs.com Signed-off-by: Paolo Abeni --- include/net/tcp_ecn.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h index 796c613b5ef3..49e0b865fe02 100644 --- a/include/net/tcp_ecn.h +++ b/include/net/tcp_ecn.h @@ -175,7 +175,9 @@ static inline void tcp_accecn_third_ack(struct sock *sk, switch (ace) { case 0x0: /* Invalid value */ - tcp_accecn_fail_mode_set(tp, TCP_ACCECN_ACE_FAIL_RECV); + if (!TCP_SKB_CB(skb)->sacked) + tcp_accecn_fail_mode_set(tp, TCP_ACCECN_ACE_FAIL_RECV | + TCP_ACCECN_OPT_FAIL_RECV); break; case 0x7: case 0x5: From 1247fb19cafee6f9fa350ae378e4e1e9965cc253 Mon Sep 17 00:00:00 2001 From: Chia-Yu Chang Date: Sat, 31 Jan 2026 23:25:12 +0100 Subject: [PATCH 12/15] tcp: accecn: detect loss ACK w/ AccECN option and add TCP_ACCECN_OPTION_PERSIST Detect spurious retransmission of a previously sent ACK carrying the AccECN option after the second retransmission. Since this might be caused by the middlebox dropping ACK with options it does not recognize, disable the sending of the AccECN option in all subsequent ACKs. This patch follows Section 3.2.3.2.2 of AccECN spec (RFC9768), and a new field (accecn_opt_sent_w_dsack) is added to indicate that an AccECN option was sent with duplicate SACK info. Also, a new AccECN option sending mode is added to tcp_ecn_option sysctl: (TCP_ECN_OPTION_PERSIST), which ignores the AccECN fallback policy and persistently sends AccECN option once it fits into TCP option space. Signed-off-by: Chia-Yu Chang Acked-by: Paolo Abeni Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260131222515.8485-13-chia-yu.chang@nokia-bell-labs.com Signed-off-by: Paolo Abeni --- Documentation/networking/ip-sysctl.rst | 4 +++- include/linux/tcp.h | 3 ++- include/net/tcp_ecn.h | 2 ++ net/ipv4/sysctl_net_ipv4.c | 2 +- net/ipv4/tcp_input.c | 13 ++++++++++++- net/ipv4/tcp_output.c | 7 ++++++- 6 files changed, 26 insertions(+), 5 deletions(-) diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index bc9a01606daf..28c7e4f5ecf9 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -482,7 +482,9 @@ tcp_ecn_option - INTEGER 1 Send AccECN option sparingly according to the minimum option rules outlined in draft-ietf-tcpm-accurate-ecn. 2 Send AccECN option on every packet whenever it fits into TCP - option space. + option space except when AccECN fallback is triggered. + 3 Send AccECN option on every packet whenever it fits into TCP + option space even when AccECN fallback is triggered. = ============================================================ Default: 2 diff --git a/include/linux/tcp.h b/include/linux/tcp.h index fbc514d582e7..f72eef31fa23 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -291,7 +291,8 @@ struct tcp_sock { u8 nonagle : 4,/* Disable Nagle algorithm? */ rate_app_limited:1; /* rate_{delivered,interval_us} limited? */ u8 received_ce_pending:4, /* Not yet transmit cnt of received_ce */ - unused2:4; + accecn_opt_sent_w_dsack:1,/* Sent ACCECN opt in previous ACK w/ D-SACK */ + unused2:3; u8 accecn_minlen:2,/* Minimum length of AccECN option sent */ est_ecnfield:2,/* ECN field for AccECN delivered estimates */ accecn_opt_demand:2,/* Demand AccECN option for n next ACKs */ diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h index 49e0b865fe02..e01653bbf181 100644 --- a/include/net/tcp_ecn.h +++ b/include/net/tcp_ecn.h @@ -29,6 +29,7 @@ enum tcp_accecn_option { TCP_ACCECN_OPTION_DISABLED = 0, TCP_ACCECN_OPTION_MINIMUM = 1, TCP_ACCECN_OPTION_FULL = 2, + TCP_ACCECN_OPTION_PERSIST = 3, }; /* Apply either ECT(0) or ECT(1) based on TCP_CONG_ECT_1_NEGOTIATION flag */ @@ -406,6 +407,7 @@ static inline void tcp_accecn_init_counters(struct tcp_sock *tp) tp->received_ce_pending = 0; __tcp_accecn_init_bytes_counters(tp->received_ecn_bytes); __tcp_accecn_init_bytes_counters(tp->delivered_ecn_bytes); + tp->accecn_opt_sent_w_dsack = 0; tp->accecn_minlen = 0; tp->accecn_opt_demand = 0; tp->est_ecnfield = 0; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index a1a50a5c80dc..385b5b986d23 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -749,7 +749,7 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_TWO, + .extra2 = SYSCTL_THREE, }, { .procname = "tcp_ecn_option_beacon", diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 988d161e9918..89526f0f2301 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5046,8 +5046,11 @@ static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq) tcp_sack_extend(tp->duplicate_sack, seq, end_seq); } -static void tcp_rcv_spurious_retrans(struct sock *sk, const struct sk_buff *skb) +static void tcp_rcv_spurious_retrans(struct sock *sk, + const struct sk_buff *skb) { + struct tcp_sock *tp = tcp_sk(sk); + /* When the ACK path fails or drops most ACKs, the sender would * timeout and spuriously retransmit the same segment repeatedly. * If it seems our ACKs are not reaching the other side, @@ -5067,6 +5070,14 @@ static void tcp_rcv_spurious_retrans(struct sock *sk, const struct sk_buff *skb) /* Save last flowlabel after a spurious retrans. */ tcp_save_lrcv_flowlabel(sk, skb); #endif + /* Check DSACK info to detect that the previous ACK carrying the + * AccECN option was lost after the second retransmision, and then + * stop sending AccECN option in all subsequent ACKs. + */ + if (tcp_ecn_mode_accecn(tp) && + tp->accecn_opt_sent_w_dsack && + TCP_SKB_CB(skb)->seq == tp->duplicate_sack[0].start_seq) + tcp_accecn_fail_mode_set(tp, TCP_ACCECN_OPT_FAIL_SEND); } static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 2b356fdbf2ca..f44d60d13b9f 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -715,9 +715,12 @@ static void tcp_options_write(struct tcphdr *th, struct tcp_sock *tp, if (tp) { tp->accecn_minlen = 0; tp->accecn_opt_tstamp = tp->tcp_mstamp; + tp->accecn_opt_sent_w_dsack = tp->rx_opt.dsack; if (tp->accecn_opt_demand) tp->accecn_opt_demand--; } + } else if (tp) { + tp->accecn_opt_sent_w_dsack = 0; } if (unlikely(OPTION_SACK_ADVERTISE & options)) { @@ -1189,7 +1192,9 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb if (tcp_ecn_mode_accecn(tp)) { int ecn_opt = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_option); - if (ecn_opt && tp->saw_accecn_opt && !tcp_accecn_opt_fail_send(tp) && + if (ecn_opt && tp->saw_accecn_opt && + (ecn_opt >= TCP_ACCECN_OPTION_PERSIST || + !tcp_accecn_opt_fail_send(tp)) && (ecn_opt >= TCP_ACCECN_OPTION_FULL || tp->accecn_opt_demand || tcp_accecn_option_beacon_check(sk))) { opts->use_synack_ecn_bytes = 0; From 4fa4ac5e584841c0f9b01c2f7dd0c2e3caa8bca0 Mon Sep 17 00:00:00 2001 From: Chia-Yu Chang Date: Sat, 31 Jan 2026 23:25:13 +0100 Subject: [PATCH 13/15] tcp: accecn: add tcpi_ecn_mode and tcpi_option2 in tcp_info Add 2-bit tcpi_ecn_mode feild within tcp_info to indicate which ECN mode is negotiated: ECN_MODE_DISABLED, ECN_MODE_RFC3168, ECN_MODE_ACCECN, or ECN_MODE_PENDING. This is done by utilizing available bits from tcpi_accecn_opt_seen (reduced from 16 bits to 2 bits) and tcpi_accecn_fail_mode (reduced from 16 bits to 4 bits). Also, an extra 24-bit tcpi_options2 field is identified to represent newer options and connection features, as all 8 bits of tcpi_options field have been used. Signed-off-by: Chia-Yu Chang Co-developed-by: Neal Cardwell Signed-off-by: Neal Cardwell Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260131222515.8485-14-chia-yu.chang@nokia-bell-labs.com Signed-off-by: Paolo Abeni --- include/net/tcp_ecn.h | 11 ----------- include/uapi/linux/tcp.h | 26 +++++++++++++++++++++++--- net/ipv4/tcp.c | 8 ++++++++ 3 files changed, 31 insertions(+), 14 deletions(-) diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h index e01653bbf181..e9a933641636 100644 --- a/include/net/tcp_ecn.h +++ b/include/net/tcp_ecn.h @@ -67,12 +67,6 @@ static inline void tcp_ecn_withdraw_cwr(struct tcp_sock *tp) tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR; } -/* tp->accecn_fail_mode */ -#define TCP_ACCECN_ACE_FAIL_SEND BIT(0) -#define TCP_ACCECN_ACE_FAIL_RECV BIT(1) -#define TCP_ACCECN_OPT_FAIL_SEND BIT(2) -#define TCP_ACCECN_OPT_FAIL_RECV BIT(3) - static inline bool tcp_accecn_ace_fail_send(const struct tcp_sock *tp) { return tp->accecn_fail_mode & TCP_ACCECN_ACE_FAIL_SEND; @@ -98,11 +92,6 @@ static inline void tcp_accecn_fail_mode_set(struct tcp_sock *tp, u8 mode) tp->accecn_fail_mode |= mode; } -#define TCP_ACCECN_OPT_NOT_SEEN 0x0 -#define TCP_ACCECN_OPT_EMPTY_SEEN 0x1 -#define TCP_ACCECN_OPT_COUNTER_SEEN 0x2 -#define TCP_ACCECN_OPT_FAIL_SEEN 0x3 - static inline u8 tcp_accecn_ace(const struct tcphdr *th) { return (th->ae << 2) | (th->cwr << 1) | th->ece; diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index dce3113787a7..03772dd4d399 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -226,6 +226,24 @@ enum tcp_ca_state { #define TCPF_CA_Loss (1<rto_stamp) info->tcpi_total_rto_time += tcp_clock_ms() - tp->rto_stamp; + if (tcp_ecn_disabled(tp)) + info->tcpi_ecn_mode = TCPI_ECN_MODE_DISABLED; + else if (tcp_ecn_mode_rfc3168(tp)) + info->tcpi_ecn_mode = TCPI_ECN_MODE_RFC3168; + else if (tcp_ecn_mode_accecn(tp)) + info->tcpi_ecn_mode = TCPI_ECN_MODE_ACCECN; + else if (tcp_ecn_mode_pending(tp)) + info->tcpi_ecn_mode = TCPI_ECN_MODE_PENDING; info->tcpi_accecn_fail_mode = tp->accecn_fail_mode; info->tcpi_accecn_opt_seen = tp->saw_accecn_opt; info->tcpi_received_ce = tp->received_ce; From 8ae3e8e6ceedfb3cf74ca18169c942e073586a39 Mon Sep 17 00:00:00 2001 From: Chia-Yu Chang Date: Sat, 31 Jan 2026 23:25:14 +0100 Subject: [PATCH 14/15] tcp: accecn: enable AccECN Enable Accurate ECN negotiation and request for incoming and outgoing connection by setting sysctl_tcp_ecn: +==============+===========================================+ | | Highest ECN variant (Accurate ECN, ECN, | | tcp_ecn | or no ECN) to be negotiated & requested | | +---------------------+---------------------+ | | Incoming connection | Outgoing connection | +==============+=====================+=====================+ | 0 | No ECN | No ECN | | 1 | ECN | ECN | | 2 | ECN | No ECN | +--------------+---------------------+---------------------+ | 3 | Accurate ECN | Accurate ECN | | 4 | Accurate ECN | ECN | | 5 | Accurate ECN | No ECN | +==============+=====================+=====================+ Refer Documentation/networking/ip-sysctl.rst for more details. Signed-off-by: Chia-Yu Chang Acked-by: Paolo Abeni Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260131222515.8485-15-chia-yu.chang@nokia-bell-labs.com Signed-off-by: Paolo Abeni --- net/ipv4/sysctl_net_ipv4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 385b5b986d23..643763bc2142 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -47,7 +47,7 @@ static unsigned int udp_child_hash_entries_max = UDP_HTABLE_SIZE_MAX; static int tcp_plb_max_rounds = 31; static int tcp_plb_max_cong_thresh = 256; static unsigned int tcp_tw_reuse_delay_max = TCP_PAWS_MSL * MSEC_PER_SEC; -static int tcp_ecn_mode_max = 2; +static int tcp_ecn_mode_max = 5; static u32 icmp_errors_extension_mask_all = GENMASK_U8(ICMP_ERR_EXT_COUNT - 1, 0); From f85d9c45f1d48a146f37cfd3d244aac4157ea390 Mon Sep 17 00:00:00 2001 From: Chia-Yu Chang Date: Sat, 31 Jan 2026 23:25:15 +0100 Subject: [PATCH 15/15] selftests/net: packetdrill: add TCP Accurate ECN cases MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Linux Accurate ECN test sets using ACE counters and AccECN options to cover several scenarios: Connection teardown, different ACK conditions, counter wrapping, SACK space grabbing, fallback schemes, negotiation retransmission/reorder/loss, AccECN option drop/loss, different handshake reflectors, data with marking, and different sysctl values. The packetdrill used is commit cbe405666c9c8698ac1e72f5e8ffc551216dfa56 of repo: https://github.com/minuscat/packetdrill/tree/upstream_accecn. And corresponding patches are sent to google/packetdrill email list. Signed-off-by: Chia-Yu Chang Co-developed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen Co-developed-by: Neal Cardwell Signed-off-by: Neal Cardwell Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260131222515.8485-16-chia-yu.chang@nokia-bell-labs.com Signed-off-by: Paolo Abeni --- .../tcp_accecn_2nd_data_as_first.pkt | 24 +++++++ .../tcp_accecn_2nd_data_as_first_connect.pkt | 30 ++++++++ .../tcp_accecn_3rd_ack_after_synack_rxmt.pkt | 19 +++++ ..._accecn_3rd_ack_ce_updates_received_ce.pkt | 18 +++++ .../tcp_accecn_3rd_ack_lost_data_ce.pkt | 22 ++++++ .../net/packetdrill/tcp_accecn_3rd_dups.pkt | 26 +++++++ .../tcp_accecn_acc_ecn_disabled.pkt | 13 ++++ .../tcp_accecn_accecn_then_notecn_syn.pkt | 28 ++++++++ .../tcp_accecn_accecn_to_rfc3168.pkt | 18 +++++ .../tcp_accecn_client_accecn_options_drop.pkt | 34 +++++++++ .../tcp_accecn_client_accecn_options_lost.pkt | 38 ++++++++++ .../tcp_accecn_clientside_disabled.pkt | 12 ++++ ...cecn_close_local_close_then_remote_fin.pkt | 25 +++++++ .../tcp_accecn_delivered_2ndlargeack.pkt | 25 +++++++ ..._accecn_delivered_falseoverflow_detect.pkt | 31 ++++++++ .../tcp_accecn_delivered_largeack.pkt | 24 +++++++ .../tcp_accecn_delivered_largeack2.pkt | 25 +++++++ .../tcp_accecn_delivered_maxack.pkt | 25 +++++++ .../tcp_accecn_delivered_updates.pkt | 70 +++++++++++++++++++ .../net/packetdrill/tcp_accecn_ecn3.pkt | 12 ++++ .../tcp_accecn_ecn_field_updates_opt.pkt | 35 ++++++++++ .../packetdrill/tcp_accecn_ipflags_drop.pkt | 14 ++++ .../tcp_accecn_listen_opt_drop.pkt | 16 +++++ .../tcp_accecn_multiple_syn_ack_drop.pkt | 28 ++++++++ .../tcp_accecn_multiple_syn_drop.pkt | 18 +++++ .../tcp_accecn_negotiation_bleach.pkt | 23 ++++++ .../tcp_accecn_negotiation_connect.pkt | 23 ++++++ .../tcp_accecn_negotiation_listen.pkt | 26 +++++++ .../tcp_accecn_negotiation_noopt_connect.pkt | 23 ++++++ .../tcp_accecn_negotiation_optenable.pkt | 23 ++++++ .../tcp_accecn_no_ecn_after_accecn.pkt | 20 ++++++ .../net/packetdrill/tcp_accecn_noopt.pkt | 27 +++++++ .../net/packetdrill/tcp_accecn_noprogress.pkt | 27 +++++++ .../tcp_accecn_notecn_then_accecn_syn.pkt | 28 ++++++++ .../tcp_accecn_rfc3168_to_fallback.pkt | 18 +++++ .../tcp_accecn_rfc3168_to_rfc3168.pkt | 18 +++++ .../tcp_accecn_sack_space_grab.pkt | 28 ++++++++ .../tcp_accecn_sack_space_grab_with_ts.pkt | 39 +++++++++++ ...tcp_accecn_serverside_accecn_disabled1.pkt | 20 ++++++ ...tcp_accecn_serverside_accecn_disabled2.pkt | 20 ++++++ .../tcp_accecn_serverside_broken.pkt | 19 +++++ .../tcp_accecn_serverside_ecn_disabled.pkt | 19 +++++ .../tcp_accecn_serverside_only.pkt | 18 +++++ ...n_syn_ace_flags_acked_after_retransmit.pkt | 18 +++++ .../tcp_accecn_syn_ace_flags_drop.pkt | 16 +++++ ...n_ack_ace_flags_acked_after_retransmit.pkt | 27 +++++++ .../tcp_accecn_syn_ack_ace_flags_drop.pkt | 26 +++++++ .../net/packetdrill/tcp_accecn_syn_ce.pkt | 13 ++++ .../net/packetdrill/tcp_accecn_syn_ect0.pkt | 13 ++++ .../net/packetdrill/tcp_accecn_syn_ect1.pkt | 13 ++++ .../net/packetdrill/tcp_accecn_synack_ce.pkt | 27 +++++++ ..._accecn_synack_ce_updates_delivered_ce.pkt | 22 ++++++ .../packetdrill/tcp_accecn_synack_ect0.pkt | 24 +++++++ .../packetdrill/tcp_accecn_synack_ect1.pkt | 24 +++++++ .../packetdrill/tcp_accecn_synack_rexmit.pkt | 15 ++++ .../packetdrill/tcp_accecn_synack_rxmt.pkt | 25 +++++++ .../packetdrill/tcp_accecn_tsnoprogress.pkt | 26 +++++++ .../net/packetdrill/tcp_accecn_tsprogress.pkt | 25 +++++++ 58 files changed, 1363 insertions(+) create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_2nd_data_as_first.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_2nd_data_as_first_connect.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_after_synack_rxmt.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_ce_updates_received_ce.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_lost_data_ce.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_dups.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_acc_ecn_disabled.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_accecn_then_notecn_syn.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_accecn_to_rfc3168.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_client_accecn_options_drop.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_client_accecn_options_lost.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_clientside_disabled.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_close_local_close_then_remote_fin.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_2ndlargeack.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_falseoverflow_detect.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_largeack.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_largeack2.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_maxack.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_updates.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_ecn3.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_ecn_field_updates_opt.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_ipflags_drop.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_listen_opt_drop.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_multiple_syn_ack_drop.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_multiple_syn_drop.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_bleach.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_connect.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_listen.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_noopt_connect.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_optenable.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_no_ecn_after_accecn.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_noopt.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_noprogress.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_notecn_then_accecn_syn.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_rfc3168_to_fallback.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_rfc3168_to_rfc3168.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_sack_space_grab.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_sack_space_grab_with_ts.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_accecn_disabled1.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_accecn_disabled2.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_broken.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_ecn_disabled.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_only.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ace_flags_acked_after_retransmit.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ace_flags_drop.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ack_ace_flags_acked_after_retransmit.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ack_ace_flags_drop.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ce.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ect0.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ect1.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ce.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ce_updates_delivered_ce.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ect0.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ect1.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_synack_rexmit.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_synack_rxmt.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_tsnoprogress.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_accecn_tsprogress.pkt diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_2nd_data_as_first.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_2nd_data_as_first.pkt new file mode 100644 index 000000000000..07e9936e70e6 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_2nd_data_as_first.pkt @@ -0,0 +1,24 @@ +// 3rd ACK + 1st data segment lost, data segments with ce + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + ++0.05 < SEWA 0:0(0) win 32767 ++.002 > SW. 0:0(0) ack 1 +// 3rd ACK lost +// 1st data segment lost ++0.05 < [ce] EAP. 1001:2001(1000) ack 1 win 264 ++.002 > [ect0] WA. 1:1(0) ack 1 ++.002 accept(3, ..., ...) = 4 + ++0.2 < [ce] EAP. 1:1001(1000) ack 1 win 264 ++.001 > [ect0] EWA. 1:1(0) ack 2001 + ++0.05 < [ce] EAP. 2001:3001(1000) ack 1 win 264 ++.001 > [ect0] . 1:1(0) ack 3001 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_2nd_data_as_first_connect.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_2nd_data_as_first_connect.pkt new file mode 100644 index 000000000000..76b8422b34dc --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_2nd_data_as_first_connect.pkt @@ -0,0 +1,30 @@ +// 3rd ACK + 1st data segment lost, 2nd data segments with ce + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 0.052 connect(4, ..., ...) = 0 + ++.002 > [noecn] SEWA 0:0(0) ++0.05 < [noecn] SW. 0:0(0) ack 1 win 32767 +// 3rd ACK lost ++.002 > [ect0] W. 1:1(0) ack 1 + ++0.01 write(4, ..., 2000) = 2000 +// 1st data segment lost + 2nd gets CE ++.002 > [ect0] .5 1:1005(1004) ack 1 ++.000 > [ect0] P.5 1005:2001(996) ack 1 ++0.05 < [ect0] .6 1:1(0) ack 1 win 264 + ++0.01 %{ assert tcpi_delivered_ce == 1, tcpi_delivered_ce }% + ++0.002~+0.1 > [ect0] .5 1:1005(1004) ack 1 ++.05 < [ect0] .6 1:1(0) ack 2001 win 264 + ++0.01 write(4, ..., 1000) = 1000 ++0~+0.002 > [ect0] P.5 2001:3001(1000) ack 1 + ++0.1 < [ect0] .5 1:1001(1000) ack 3001 win 264 ++0~+0.01 > [ect0] .5 3001:3001(0) ack 1001 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_after_synack_rxmt.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_after_synack_rxmt.pkt new file mode 100644 index 000000000000..84060e490589 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_after_synack_rxmt.pkt @@ -0,0 +1,19 @@ +// Test 3rd ACK flags when SYN-ACK is rexmitted + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 0.052 connect(4, ..., ...) = 0 + ++.002 > [noecn] SEWA 0:0(0) ++0.05 < [noecn] SW. 0:0(0) ack 1 win 32767 ++.002 > [ect0] W. 1:1(0) ack 1 + ++0.1 < [ect0] S. 0:0(0) ack 1 win 32767 + +// Our code currently sends a challenge ACK +// when it receives a SYN in ESTABLISHED state +// based on the latest SYN ++.002 > [ect0] A. 1:1(0) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_ce_updates_received_ce.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_ce_updates_received_ce.pkt new file mode 100644 index 000000000000..d3fe09d0606f --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_ce_updates_received_ce.pkt @@ -0,0 +1,18 @@ +// Third ACK CE increases r.cep + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + ++0.05 < SEWA 0:0(0) win 32767 ++.002 > SW. 0:0(0) ack 1 ++0.05 < [ce] W. 1:1(0) ack 1 win 264 ++.002 accept(3, ..., ...) = 4 + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] WAP. 1:1001(1000) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_lost_data_ce.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_lost_data_ce.pkt new file mode 100644 index 000000000000..d28722db42b1 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_lost_data_ce.pkt @@ -0,0 +1,22 @@ +// 3rd ACK lost, CE for the first data segment + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + ++0.05 < SEWA 0:0(0) win 32767 ++.002 > SW. 0:0(0) ack 1 +// 3rd ACK lost ++0.05 < [ce] EAP. 1:1001(1000) ack 1 win 264 ++.002 > [ect0] WA. 1:1(0) ack 1001 ++.002 accept(3, ..., ...) = 4 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% + ++0.05 < [ce] EAP. 1001:2001(1000) ack 1 win 264 ++.001 > [ect0] EWA. 1:1(0) ack 2001 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_dups.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_dups.pkt new file mode 100644 index 000000000000..a4d808116e34 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_dups.pkt @@ -0,0 +1,26 @@ +// Test SYN/ACK rexmit triggered 3rd ACK duplicate + CE on first data seg + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 ++.002 > SW. 0:0(0) ack 1 + +// SYN/ACK rexmitted => two 3rd ACKs in-flight ++1.0~+1.1 > SW. 0:0(0) ack 1 +// Delivered 1st 3rd ACK ++0.05 < [ect0] W. 1:1(0) ack 1 win 257 ++.002 accept(3, ..., ...) = 4 + +// Duplicate 3rd ACK delivered ++1.05 < [ect0] W. 1:1(0) ack 1 win 257 + ++0.05 < [ce] EAP. 1:1001(1000) ack 1 win 257 ++.002 > [ect0] WA. 1:1(0) ack 1001 + +0 read(4, ..., 1000) = 1000 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_acc_ecn_disabled.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_acc_ecn_disabled.pkt new file mode 100644 index 000000000000..410a303c6d49 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_acc_ecn_disabled.pkt @@ -0,0 +1,13 @@ +// Test that when accurate ECN is disabled, +// client uses RFC3168 ECN for SYN + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=1 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 0.052 connect(4, ..., ...) = 0 + ++.002 > [noecn] SEW 0:0(0) ++0.05 < [noecn] S. 0:0(0) ack 1 win 32767 ++.002 > [noecn] . 1:1(0) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_accecn_then_notecn_syn.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_accecn_then_notecn_syn.pkt new file mode 100644 index 000000000000..10728114b11b --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_accecn_then_notecn_syn.pkt @@ -0,0 +1,28 @@ +// Test that SYN-ACK with ACE flags and without +// ACE flags got dropped. Although we disable ECN, +// we shouldn't consider this as blackholed as +// these are dropped due to congestion + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +sysctl -q net.ipv4.tcp_ecn_option=2 +` + ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 ++0 bind(3, ..., ...) = 0 ++0 listen(3, 1) = 0 + ++0 < [ect0] SEWA 0:0(0) win 32792 ++.002 > [noecn] SA. 0:0(0) ack 1 + +// Retransmit SYN ++0.1 < [noecn] S 0:0(0) win 32792 ++.002 > [noecn] SW. 0:0(0) ack 1 + ++0.1 < [noecn] W. 1:1(0) ack 1 win 320 ++.002 accept(3, ..., ...) = 4 + +// Write with AccECN option but with ip-noecn since we received one SYN with ACE=0 ++0.01 write(4, ..., 100) = 100 ++.002 > [noecn] P5. 1:101(100) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_accecn_to_rfc3168.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_accecn_to_rfc3168.pkt new file mode 100644 index 000000000000..04d928f0d44d --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_accecn_to_rfc3168.pkt @@ -0,0 +1,18 @@ +// Test AccECN -> RFC3168 fallback when sysctl asks for RFC3168 ECN + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=1 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 ++.002 > SE. 0:0(0) ack 1 ++0.05 < . 1:1(0) ack 1 win 320 ++.002 accept(3, ..., ...) = 4 + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] P. 1:1001(1000) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_client_accecn_options_drop.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_client_accecn_options_drop.pkt new file mode 100644 index 000000000000..788af6bea69c --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_client_accecn_options_drop.pkt @@ -0,0 +1,34 @@ +// Client negotiates AccECN and starts sending +// AccECN option in last ACK and data segments +// Middlebox drops AccECN option and client +// reverts to ACE flags only + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +sysctl -q net.ipv4.tcp_ecn_option=2 +sysctl -q net.ipv4.tcp_ecn_option_beacon=1 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 ++.002 > SW. 0:0(0) ack 1 ++0.05 < [ect0] W. 1:1(0) ack 1 win 257 ++.002 accept(3, ..., ...) = 4 + ++0.05 < [ect0] EAP. 1:1001(1000) ack 1 win 257 ++.002 > [ect0] EA. 1:1(0) ack 1001 + +0 read(4, ..., 1000) = 1000 + ++0.05 < [ect0] EAP. 1:1001(1000) ack 1 win 257 ++.002 > [ect0] EA. 1:1(0) ack 1001 + ++0.05 < [ect0] EAP. 1:1001(1000) ack 1 win 257 ++.002 > [ect0] EA. 1:1(0) ack 1001 + ++0.05 < [ect0] EAP. 1001:2001(1000) ack 1 win 257 ++.002 > [ect0] EA. 1:1(0) ack 2001 + +0 read(4, ..., 1000) = 1000 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_client_accecn_options_lost.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_client_accecn_options_lost.pkt new file mode 100644 index 000000000000..f5839c2e682d --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_client_accecn_options_lost.pkt @@ -0,0 +1,38 @@ +// Client negotiates AccECN and starts sending +// AccECN option in last ACK and data segments +// Middlebox accepts AccECN option but some packets +// are lost due to congestion. Client should +// continue to send AccECN option + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +sysctl -q net.ipv4.tcp_ecn_option=2 +` + ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 0.102 connect(4, ..., ...) = 0 + ++.002 > [noecn] SEWA 0:0(0) ++0.1 < [ect0] SW. 0:0(0) ack 1 win 32767 ++.002 > [ect0] A. 1:1(0) ack 1 + +// Send ++0.01 write(4, ..., 3000) = 3000 ++.002 > [ect0] .5 1:1013(1012) ack 1 ++.002 > [ect0] P.5 1013:2025(1012) ack 1 ++.002 > [ect0] P.5 2025:3001(976) ack 1 + +// First two segments were lost due to congestion as SACK was +// received acknowledging 3rd segment ++0.1 < [ect0] .5 1:1(0) ack 1 win 264 + +// Since data with option was SACKed, we can +// continue to use AccECN option for the rest of +// the connection. This one is a rexmt ++.02~+0.5 > [ect0] .5 1:1013(1012) ack 1 ++0.1 < [ect0] .5 1:1(0) ack 3001 win 264 + +// Send new data, it should contain AccECN option ++0.01 write(4, ..., 2000) = 2000 ++.002 > [ect0] .5 3001:4013(1012) ack 1 ++.002 > [ect0] P.5 4013:5001(988) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_clientside_disabled.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_clientside_disabled.pkt new file mode 100644 index 000000000000..c00b36d6a833 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_clientside_disabled.pkt @@ -0,0 +1,12 @@ +// AccECN sysctl server-side only, no ECN/AccECN + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=5 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 0.052 connect(4, ..., ...) = 0 + ++.002 > S 0:0(0) ++0.05 < S. 0:0(0) ack 1 win 32767 ++.002 > . 1:1(0) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_close_local_close_then_remote_fin.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_close_local_close_then_remote_fin.pkt new file mode 100644 index 000000000000..f9c27f39f354 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_close_local_close_then_remote_fin.pkt @@ -0,0 +1,25 @@ +// Test basic connection teardown where local process closes first: +// the local process calls close() first, so we send a FIN, and receive an ACK. +// Then we receive a FIN and ACK it. + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +sysctl -q net.ipv4.tcp_ecn_option=0 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +.01...0.011 connect(3, ..., ...) = 0 + +0 > [noecn] SEWA 0:0(0) <...> + +0 < [ect1] SW. 0:0(0) ack 1 win 32768 + +0 > [ect0] EW. 1:1(0) ack 1 + + +0 write(3, ..., 1000) = 1000 + +0 > [ect0] P5. 1:1001(1000) ack 1 + +0 < [ect0] .5 1:1(0) ack 1001 win 257 + + +0 close(3) = 0 + +0 > [ect0] F5. 1001:1001(0) ack 1 + +0 < [ect0] .5 1:1(0) ack 1002 win 257 + + +0 < [ect0] F5. 1:1(0) ack 1002 win 257 + +0 > [ect0] . 1002:1002(0) ack 2 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_2ndlargeack.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_2ndlargeack.pkt new file mode 100644 index 000000000000..6d771234124a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_2ndlargeack.pkt @@ -0,0 +1,25 @@ +// Test a large ACK (> ACE field max) + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +sysctl -q net.ipv4.tcp_ecn_option=0 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 ++.002 > SW. 0:0(0) ack 1 ++0.05 < [ect0] W. 1:1(0) ack 1 win 264 ++.002 accept(3, ..., ...) = 4 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% + ++0.01 write(4, ..., 14600) = 14600 ++.002 > [ect0] P.5 1:14601(14600) ack 1 ++0.05 < [ect0] .5 1:1(0) ack 1461 win 264 ++0.05 < [ect0] .5 1:1(0) ack 14601 win 264 + ++0.01 %{ assert tcpi_delivered_ce == 8, tcpi_delivered_ce }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_falseoverflow_detect.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_falseoverflow_detect.pkt new file mode 100644 index 000000000000..76384f52b021 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_falseoverflow_detect.pkt @@ -0,0 +1,31 @@ +// Test false overflow detection with option used to rule out overflow + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 ++.002 > SW. 0:0(0) ack 1 ++0.05 < [ect0] W. 1:1(0) ack 1 win 264 ++.002 accept(3, ..., ...) = 4 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% + +// Stop sending option to allow easier testing ++0 `sysctl -q net.ipv4.tcp_ecn_option=0` + ++0.002 write(4, ..., 14600) = 14600 ++.002 > [ect0] P.5 1:14601(14600) ack 1 + ++0.05 < [ect0] .5 1:1(0) ack 1460 win 264 ++0.05 < [ect0] .5 1:1(0) ack 14601 win 264 + ++0.01 %{ +assert tcpi_delivered_ce == 0, tcpi_delivered_ce +assert tcpi_delivered_e0_bytes == 14600, tcpi_delivered_e0_bytes +}% diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_largeack.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_largeack.pkt new file mode 100644 index 000000000000..8bce5dce35a2 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_largeack.pkt @@ -0,0 +1,24 @@ +// Test a large ACK (> ACE field max) + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +sysctl -q net.ipv4.tcp_ecn_option=0 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 ++.002 > SW. 0:0(0) ack 1 ++0.05 < [ect0] W. 1:1(0) ack 1 win 264 ++.002 accept(3, ..., ...) = 4 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% + ++0.01 write(4, ..., 14600) = 14600 ++.002 > [ect0] P.5 1:14601(14600) ack 1 ++0.05 < [ect0] .5 1:1(0) ack 14601 win 264 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_largeack2.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_largeack2.pkt new file mode 100644 index 000000000000..5f2b147214f4 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_largeack2.pkt @@ -0,0 +1,25 @@ +// Test a large ACK (> ACE field max) + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +sysctl -q net.ipv4.tcp_ecn_option=0 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 ++.002 > SW. 0:0(0) ack 1 ++0.05 < [ect0] W. 1:1(0) ack 1 win 264 ++.002 accept(3, ..., ...) = 4 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% + ++0.01 write(4, ..., 14600) = 14600 ++.002 > [ect0] P.5 1:14601(14600) ack 1 + // Fake CE ++0.05 < [ect0] .6 1:1(0) ack 14601 win 264 + ++0.01 %{ assert tcpi_delivered_ce == 1, tcpi_delivered_ce }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_maxack.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_maxack.pkt new file mode 100644 index 000000000000..fd07bdc14f37 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_maxack.pkt @@ -0,0 +1,25 @@ +// Test a large ACK (at ACE field max delta) + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +sysctl -q net.ipv4.tcp_ecn_option=0 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 ++.002 > SW. 0:0(0) ack 1 ++0.05 < [ect0] W. 1:1(0) ack 1 win 264 ++.002 accept(3, ..., ...) = 4 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% + ++0.01 write(4, ..., 14600) = 14600 ++.002 > [ect0] P.5 1:14601(14600) ack 1 + // Fake CE ++0.05 < [ect0] .4 1:1(0) ack 14601 win 264 + ++0.01 %{ assert tcpi_delivered_ce == 7, tcpi_delivered_ce }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_updates.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_updates.pkt new file mode 100644 index 000000000000..cb1e70ff2d26 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_updates.pkt @@ -0,0 +1,70 @@ +// Test basic AccECN CEP/CEB/E0B/E1B functionality & CEP wrapping + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 ++.002 > SW. 0:0(0) ack 1 ++0.05 < [ect0] W. 1:1(0) ack 1 win 264 ++.002 accept(3, ..., ...) = 4 + ++0.01 %{ +assert tcpi_delivered_ce == 0, tcpi_delivered_ce +assert tcpi_delivered_ce_bytes == 0, tcpi_delivered_ce_bytes +}% + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 1:1001(1000) ack 1 + // Fake CE ++0.05 < [ect0] WA. 1:1(0) ack 1001 win 264 + ++0.01 %{ +assert tcpi_delivered_ce == 1, tcpi_delivered_ce +assert tcpi_delivered_ce_bytes == 1000, tcpi_delivered_ce_bytes +}% + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 1001:2001(1000) ack 1 + // Fake ect0 ++0.05 < [ect0] WA. 1:1(0) ack 2001 win 264 + ++0.01 %{ +assert tcpi_delivered_ce == 1, tcpi_delivered_ce +assert tcpi_delivered_e0_bytes == 1000, tcpi_delivered_e0_bytes +}% + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 2001:3001(1000) ack 1 + // Fake ce ++0.05 < [ect0] EWA. 1:1(0) ack 3001 win 264 + ++0.01 %{ +assert tcpi_delivered_ce == 2, tcpi_delivered_ce +assert tcpi_delivered_ce_bytes == 2000, tcpi_delivered_ce_bytes +}% + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 3001:4001(1000) ack 1 + // Fake ect1 ++0.05 < [ect0] EWA. 1:1(0) ack 4001 win 264 + ++0.01 %{ +assert tcpi_delivered_ce == 2, tcpi_delivered_ce +assert tcpi_delivered_e1_bytes == 1000, tcpi_delivered_e1_bytes +}% + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 4001:5001(1000) ack 1 + // Fake ce ++0.05 < [ect0] . 1:1(0) ack 5001 win 264 + ++0.01 %{ +assert tcpi_delivered_ce == 3, tcpi_delivered_ce +assert tcpi_delivered_ce_bytes == 3000, tcpi_delivered_ce_bytes +}% diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_ecn3.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_ecn3.pkt new file mode 100644 index 000000000000..6627c7bb2d26 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_ecn3.pkt @@ -0,0 +1,12 @@ +// Test that tcp_ecn=4 uses RFC3168 ECN for SYN + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=4 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 0.05 connect(4, ..., ...) = 0 + ++.002 > SEW 0:0(0) ++0.05 < S. 0:0(0) ack 1 win 32767 ++.002 > . 1:1(0) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_ecn_field_updates_opt.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_ecn_field_updates_opt.pkt new file mode 100644 index 000000000000..51879477bb50 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_ecn_field_updates_opt.pkt @@ -0,0 +1,35 @@ +// Test basic AccECN CEP/CEB/E0B/E1B functionality & CEP wrapping + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 ++.002 > SW. 0:0(0) ack 1 ++0.05 < [ect0] W. 1:1(0) ack 1 win 257 ++.002 accept(3, ..., ...) = 4 + ++0.05 < [ce] EAP. 1:1001(1000) ack 1 win 257 ++.002 > [ect0] WA. 1:1(0) ack 1001 + +0 read(4, ..., 1000) = 1000 + ++0.05 < [ect0] EAP. 1001:2001(1000) ack 1 win 257 ++.002 > [ect0] WA. 1:1(0) ack 2001 + +0 read(4, ..., 1000) = 1000 + ++0.05 < [ce] EAP. 2001:3001(1000) ack 1 win 257 ++.002 > [ect0] EWA. 1:1(0) ack 3001 + +0 read(4, ..., 1000) = 1000 + ++0.05 < [ect1] EAP. 3001:4001(1000) ack 1 win 257 ++.002 > [ect0] EWA. 1:1(0) ack 4001 + +0 read(4, ..., 1000) = 1000 + ++0.05 < [ce] EAP. 4001:5001(1000) ack 1 win 257 ++.002 > [ect0] . 1:1(0) ack 5001 + +0 read(4, ..., 1000) = 1000 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_ipflags_drop.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_ipflags_drop.pkt new file mode 100644 index 000000000000..0c72fa4a1251 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_ipflags_drop.pkt @@ -0,0 +1,14 @@ +// Test IP flags drop +--tolerance_usecs=50000 + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 1.1 connect(4, ..., ...) = 0 + ++.002 > SEWA 0:0(0) ++.02 ~ +1.1 > SEWA 0:0(0) ++0.05 < S. 0:0(0) ack 1 win 32767 ++.002 > [noecn] . 1:1(0) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_listen_opt_drop.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_listen_opt_drop.pkt new file mode 100644 index 000000000000..171f9433e55f --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_listen_opt_drop.pkt @@ -0,0 +1,16 @@ +// SYN/ACK option drop test + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 ++.002 > SW. 0:0(0) ack 1 ++.02 ~+2 > SW. 0:0(0) ack 1 ++.02 ~+5 > S. 0:0(0) ack 1 ++.02 ~+8 > S. 0:0(0) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_multiple_syn_ack_drop.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_multiple_syn_ack_drop.pkt new file mode 100644 index 000000000000..0f65cf56cd2b --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_multiple_syn_ack_drop.pkt @@ -0,0 +1,28 @@ +// Test that SYN-ACK with ACE flags and without +// ACE flags got dropped. Although we disable ECN, +// we shouldn't consider this as blackholed as +// these are dropped due to congestion + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +sysctl -q net.ipv4.tcp_ecn_option=2 +` + ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 ++0 bind(3, ..., ...) = 0 ++0 listen(3, 1) = 0 + ++0 < [noecn] SEWA 0:0(0) win 32792 ++.002 > [noecn] SW. 0:0(0) ack 1 + +// Retransmit SYN-ACK without option ++1~+1.1 > [noecn] SW. 0:0(0) ack 1 + +// SYN-ACK maybe getting blackholed, disable ECN ++2~+2.2 > [noecn] S. 0:0(0) ack 1 ++4~+4.4 > [noecn] S. 0:0(0) ack 1 + +// Received an ACK after sending 3rd retransmission, not a blackhole ++0.1 < [noecn] . 1:1(0) ack 1 win 320 ++.002 accept(3, ..., ...) = 4 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_multiple_syn_drop.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_multiple_syn_drop.pkt new file mode 100644 index 000000000000..343181633980 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_multiple_syn_drop.pkt @@ -0,0 +1,18 @@ +// Test that SYN with ACE flags and without +// ACE flags got dropped. Although we disable +// ECN, we shouldn't consider this as blackholed +// as these are dropped due to congestion + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 3.1 connect(4, ..., ...) = 0 + ++.002 > [noecn] SEWA 0:0(0) ++.02~+1.1 > [noecn] SEWA 0:0(0) ++.02~+1.1 > [noecn] S 0:0(0) ++.02~+1.1 > [noecn] S 0:0(0) ++0.1 < [noecn] S. 0:0(0) ack 1 win 32767 ++0~+0.01 > [noecn] . 1:1(0) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_bleach.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_bleach.pkt new file mode 100644 index 000000000000..37dabc4603c8 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_bleach.pkt @@ -0,0 +1,23 @@ +// Test AccECN flags bleach + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 ++.002 > SW. 0:0(0) ack 1 ++0.05 < [ect0] . 1:1(0) ack 1 win 320 ++.002 accept(3, ..., ...) = 4 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [noecn] EAP. 1:1001(1000) ack 1 ++0.05 < [ect0] EAP. 1:1(0) ack 1001 win 320 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_connect.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_connect.pkt new file mode 100644 index 000000000000..5b14892fda51 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_connect.pkt @@ -0,0 +1,23 @@ +// Test basic AccECN negotiation + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 0.052 connect(4, ..., ...) = 0 + ++.002 > SEWA 0:0(0) ++0.05 < SW. 0:0(0) ack 1 win 32767 ++.002 > [ect0] W. 1:1(0) ack 1 + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 1:1001(1000) ack 1 ++.05 < [ect0] EAP. 1:1(0) ack 1001 win 256 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 1001:2001(1000) ack 1 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_listen.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_listen.pkt new file mode 100644 index 000000000000..25f7cb2feb25 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_listen.pkt @@ -0,0 +1,26 @@ +// Test basic AccECN negotiation + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 ++.002 > SW. 0:0(0) ack 1 ++0.05 < [ect0] W. 1:1(0) ack 1 win 320 ++.002 accept(3, ..., ...) = 4 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 1:1001(1000) ack 1 ++0.05 < [ect0] EAP. 1:1(0) ack 1001 win 320 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 1001:2001(1000) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_noopt_connect.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_noopt_connect.pkt new file mode 100644 index 000000000000..50e08c492a69 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_noopt_connect.pkt @@ -0,0 +1,23 @@ +// Test basic AccECN negotiation without option + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 0.052 connect(4, ..., ...) = 0 + ++.002 > SEWA 0:0(0) ++0.05 < SW. 0:0(0) ack 1 win 32767 ++.002 > [ect0] W. 1:1(0) ack 1 + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 1:1001(1000) ack 1 ++.05 < [ect0] EAP. 1:1(0) ack 1001 win 256 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 1001:2001(1000) ack 1 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_optenable.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_optenable.pkt new file mode 100644 index 000000000000..2904f1ba9975 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_optenable.pkt @@ -0,0 +1,23 @@ +// Test basic AccECN negotiation, late option enable + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 0.052 connect(4, ..., ...) = 0 + ++.002 > SEWA 0:0(0) ++0.05 < SW. 0:0(0) ack 1 win 32767 ++.002 > [ect0] W. 1:1(0) ack 1 + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 1:1001(1000) ack 1 ++.05 < [ect0] EAP. 1:1(0) ack 1001 win 256 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 1001:2001(1000) ack 1 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_no_ecn_after_accecn.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_no_ecn_after_accecn.pkt new file mode 100644 index 000000000000..64e0fc1c1f14 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_no_ecn_after_accecn.pkt @@ -0,0 +1,20 @@ +// Test client behavior on receiving a non ECN SYN-ACK +// after receiving an AccECN SYN-ACK and moving to +// ESTABLISHED state + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +sysctl -q net.ipv4.tcp_ecn_option=2 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 0.052 connect(4, ..., ...) = 0 + ++.002 > [noecn] SEWA 0:0(0) +// Receive an AccECN SYN-ACK and move to ESTABLISHED ++0.05 < [noecn] SW. 0:0(0) ack 1 win 32767 ++.002 > [ect0] W. 1:1(0) ack 1 + +// Receive a non ECN SYN-ACK and send a challenge ACK with ACE feedback ++0.1 < [noecn] S. 0:0(0) ack 1 win 32767 ++.002 > [ect0] W. 1:1(0) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_noopt.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_noopt.pkt new file mode 100644 index 000000000000..f407c629a3f7 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_noopt.pkt @@ -0,0 +1,27 @@ +// Test basic AccECN negotiation with option off using sysctl + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +sysctl -q net.ipv4.tcp_ecn_option=0 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 ++.002 > SW. 0:0(0) ack 1 ++0.05 < [ect0] W. 1:1(0) ack 1 win 320 ++.002 accept(3, ..., ...) = 4 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 1:1001(1000) ack 1 ++0.05 < [ect0] EAP. 1:1(0) ack 1001 win 320 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 1001:2001(1000) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_noprogress.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_noprogress.pkt new file mode 100644 index 000000000000..32454e7187f9 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_noprogress.pkt @@ -0,0 +1,27 @@ +// Test no progress filtering + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 ++.002 > SW. 0:0(0) ack 1 ++0.05 < [ect0] W. 1:1(0) ack 1 win 264 ++.002 accept(3, ..., ...) = 4 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 1:1001(1000) ack 1 + // Fake CE and claim no progress ++0.05 < [ect0] WA. 1:1(0) ack 1 win 264 + ++0.01 %{ +assert tcpi_delivered_ce == 0, tcpi_delivered_ce +assert tcpi_delivered_ce_bytes == 0, tcpi_delivered_ce_bytes +}% diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_notecn_then_accecn_syn.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_notecn_then_accecn_syn.pkt new file mode 100644 index 000000000000..6597d5f2d778 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_notecn_then_accecn_syn.pkt @@ -0,0 +1,28 @@ +// Test that SYN-ACK with ACE flags and without +// ACE flags got dropped. Although we disable ECN, +// we shouldn't consider this as blackholed as +// these are dropped due to congestion + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +sysctl -q net.ipv4.tcp_ecn_option=2 +` + ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 ++0 bind(3, ..., ...) = 0 ++0 listen(3, 1) = 0 + ++0 < [noecn] S 0:0(0) win 32792 ++.002 > [noecn] S. 0:0(0) ack 1 + +// Retransmit SYN ++0.1 < [ect0] SEWA 0:0(0) win 32792 ++.002 > [noecn] S. 0:0(0) ack 1 + ++0.1 < [noecn] . 1:1(0) ack 1 win 320 ++.002 accept(3, ..., ...) = 4 + +// Write with AccECN option but with ip-noecn since we received one SYN with ACE=0 ++0.01 write(4, ..., 100) = 100 ++.002 > [noecn] P. 1:101(100) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_rfc3168_to_fallback.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_rfc3168_to_fallback.pkt new file mode 100644 index 000000000000..0f97dfcfa82d --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_rfc3168_to_fallback.pkt @@ -0,0 +1,18 @@ +// Test RFC3168 fallback when sysctl asks for AccECN + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEW 0:0(0) win 32792 ++.002 > SE. 0:0(0) ack 1 ++0.05 < . 1:1(0) ack 1 win 320 ++.002 accept(3, ..., ...) = 4 + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] P. 1:1001(1000) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_rfc3168_to_rfc3168.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_rfc3168_to_rfc3168.pkt new file mode 100644 index 000000000000..9baffdd66fe5 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_rfc3168_to_rfc3168.pkt @@ -0,0 +1,18 @@ +// Test RFC3168 ECN when sysctl asks for RFC3168 ECN + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=1 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEW 0:0(0) win 32792 ++.002 > SE. 0:0(0) ack 1 ++0.05 < . 1:1(0) ack 1 win 320 ++.002 accept(3, ..., ...) = 4 + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] P. 1:1001(1000) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_sack_space_grab.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_sack_space_grab.pkt new file mode 100644 index 000000000000..3fc56f9c6a6f --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_sack_space_grab.pkt @@ -0,0 +1,28 @@ +// Test SACK space grab to fit AccECN option +--tcp_ts_tick_usecs=1000 + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 ++.002 > SW. 0:0(0) ack 1 ++0.05 < [ect0] W. 1:1(0) ack 1 win 264 ++.002 accept(3, ..., ...) = 4 + ++.01 < [ect1] EAP. 1001:2001(1000) ack 1 win 264 ++0.002 > [ect0] EA. 1:1(0) ack 1 ++.01 < [ect0] EAP. 3001:4001(1000) ack 1 win 264 ++0.002 > [ect0] EA. 1:1(0) ack 1 ++.01 < [ce] EAP. 5001:6001(1000) ack 1 win 264 ++0.002 > [ect0] WA. 1:1(0) ack 1 +// DSACK works? ++.01 < [ect0] EAP. 5001:6001(1000) ack 1 win 264 ++0.002 > [ect0] WA. 1:1(0) ack 1 ++.01 < [ect1] EAP. 6001:7001(1000) ack 1 win 264 ++0.002 > [ect0] WA. 1:1(0) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_sack_space_grab_with_ts.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_sack_space_grab_with_ts.pkt new file mode 100644 index 000000000000..1c075b5d81ae --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_sack_space_grab_with_ts.pkt @@ -0,0 +1,39 @@ +// Test SACK space grab to fit AccECN option +--tcp_ts_tick_usecs=1000 + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 ++.002 > SW. 0:0(0) ack 1 ++0.05 < [ect0] W. 1:1(0) ack 1 win 264 ++.002 accept(3, ..., ...) = 4 + +// One SACK block should allow all 3 AccECN fields: ++.01 < [ect1] EAP. 1001:2001(1000) ack 1 win 264 ++0.002 > [ect0] EA. 1:1(0) ack 1 + +// Two SACK blocks should fit w/ AccECN if we only need to use 2 AccECN fields: check ect1 arriving. ++.01 < [ect1] EAP. 3001:4001(1000) ack 1 win 264 ++0.002 > [ect0] EA. 1:1(0) ack 1 + +// Two SACK blocks should fit w/ AccECN if we only need to use 2 AccECN fields: check CE arriving. ++.01 < [ce] EAP. 5001:6001(1000) ack 1 win 264 ++0.002 > [ect0] WA. 1:1(0) ack 1 + +// Check that DSACK works, using 2 SACK blocks in total, if we only need to use 2 AccECN fields: check ect1 arriving. ++.01 < [ect1] EAP. 5001:6001(1000) ack 1 win 264 ++0.002 > [ect0] WA. 1:1(0) ack 1 + +// Check the case where the AccECN option doesn't fit, because sending ect0 +// with order 1 would rquire 3 AccECN fields, +// and TS (12 bytes) + 2 SACK blocks (20 bytes) + 3 AccECN fields (2 + 3*3 bytes) > 40 bytes. +// That's OK; Linux TCP AccECN is optimized for the ECT1 case, not ECT0. ++.01 < [ect0] EAP. 6001:7001(1000) ack 1 win 264 ++0.002 > [ect0] WA. 1:1(0) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_accecn_disabled1.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_accecn_disabled1.pkt new file mode 100644 index 000000000000..6b88ab78bfce --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_accecn_disabled1.pkt @@ -0,0 +1,20 @@ +// Test against classic ECN server +// Not-ECT on SYN and server sets 1|0|1 (AE is unused for classic ECN) + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 0.052 connect(4, ..., ...) = 0 + ++.002 > [noecn] SEWA 0:0(0) ++0.05 < [noecn] SEA. 0:0(0) ack 1 win 32767 ++.002 > [ect0] W. 1:1(0) ack 1 + ++0 write(4, ..., 100) = 100 ++.002 > [ect0] P.5 1:101(100) ack 1 ++0 close(4) = 0 + ++.002 > [ect0] F.5 101:101(0) ack 1 ++0.1 < [noecn] R. 1:1(0) ack 102 win 4242 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_accecn_disabled2.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_accecn_disabled2.pkt new file mode 100644 index 000000000000..d24ada008ece --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_accecn_disabled2.pkt @@ -0,0 +1,20 @@ +// Test against classic ECN server +// Not-ECT on SYN and server sets 0|0|1 + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 0.052 connect(4, ..., ...) = 0 + ++.002 > [noecn] SEWA 0:0(0) ++0.05 < [noecn] SE. 0:0(0) ack 1 win 32767 ++.002 > [noecn] . 1:1(0) ack 1 + ++0 write(4, ..., 100) = 100 ++.002 > [ect0] P. 1:101(100) ack 1 ++0 close(4) = 0 + ++0 > [noecn] F. 101:101(0) ack 1 <...> ++0.1 < R. 1:1(0) ack 102 win 4242 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_broken.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_broken.pkt new file mode 100644 index 000000000000..a20d7e890ee1 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_broken.pkt @@ -0,0 +1,19 @@ +// Test against broken server (1|1|1) + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 0.052 connect(4, ..., ...) = 0 + ++.002 > [noecn] SEWA 0:0(0) ++0.05 < [noecn] SEWA. 0:0(0) ack 1 win 32767 ++.002 > [noecn] . 1:1(0) ack 1 + ++0 write(4, ..., 100) = 100 ++.002 > [noecn] P. 1:101(100) ack 1 ++0 close(4) = 0 + ++.002 > [noecn] F. 101:101(0) ack 1 <...> ++0.1 < [noecn] R. 1:1(0) ack 102 win 4242 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_ecn_disabled.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_ecn_disabled.pkt new file mode 100644 index 000000000000..428255bedab7 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_ecn_disabled.pkt @@ -0,0 +1,19 @@ +// Test against Non ECN server (0|0|0) + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 0.052 connect(4, ..., ...) = 0 + ++.002 > [noecn] SEWA 0:0(0) ++0.05 < [noecn] S. 0:0(0) ack 1 win 32767 ++.002 > [noecn] . 1:1(0) ack 1 + ++0 write(4, ..., 100) = 100 ++.002 > [noecn] P. 1:101(100) ack 1 ++0 close(4) = 0 + ++.002 > [noecn] F. 101:101(0) ack 1 ++0.1 < [noecn] R. 1:1(0) ack 102 win 4242 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_only.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_only.pkt new file mode 100644 index 000000000000..e9a5a0d3677c --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_only.pkt @@ -0,0 +1,18 @@ +// Test AccECN with sysctl set to server-side only + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=5 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 ++.002 > SW. 0:0(0) ack 1 ++0.05 < [ect0] W. 1:1(0) ack 1 win 320 ++.002 accept(3, ..., ...) = 4 + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 1:1001(1000) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ace_flags_acked_after_retransmit.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ace_flags_acked_after_retransmit.pkt new file mode 100644 index 000000000000..412fa903105c --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ace_flags_acked_after_retransmit.pkt @@ -0,0 +1,18 @@ +// Test that SYN with ACE flags was Acked +// after 2nd retransmission. In this case, +// since we got SYN-ACK that supports Accurate +// ECN, we consider this as successful negotiation + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 2.1 connect(4, ..., ...) = 0 + ++.002 > [noecn] SEWA 0:0(0) ++1~+1.1 > [noecn] SEWA 0:0(0) ++1~+1.1 > [noecn] S 0:0(0) + ++0.1 < [noecn] SW. 0:0(0) ack 1 win 32767 ++0~+0.01 > [ect0] W. 1:1(0) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ace_flags_drop.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ace_flags_drop.pkt new file mode 100644 index 000000000000..4622754a2270 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ace_flags_drop.pkt @@ -0,0 +1,16 @@ +// Test that SYN with ACE flags got dropped +// We retry one more time with ACE and then +// fallback to disabled ECN + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 2.1 connect(4, ..., ...) = 0 + ++.002 > [noecn] SEWA 0:0(0) ++1~+1.1 > [noecn] SEWA 0:0(0) ++1~+1.1 > [noecn] S 0:0(0) ++0.1 < [noecn] S. 0:0(0) ack 1 win 32767 ++0~+0.01 > [noecn] . 1:1(0) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ack_ace_flags_acked_after_retransmit.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ack_ace_flags_acked_after_retransmit.pkt new file mode 100644 index 000000000000..ee15f108cafe --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ack_ace_flags_acked_after_retransmit.pkt @@ -0,0 +1,27 @@ +// Test that SYN-ACK with ACE flags was Acked +// after 2nd retransmission. In this case, +// since we got the last ACK that supports Accurate +// ECN, we consider this as successful negotiation + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +sysctl -q net.ipv4.tcp_ecn_option=2 +` + ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 ++0 bind(3, ..., ...) = 0 ++0 listen(3, 1) = 0 + ++0 < [noecn] SEWA 0:0(0) win 32792 ++.002 > [noecn] SW. 0:0(0) ack 1 + +// Retransmit SYN-ACK without option ++1~+1.1 > [noecn] SW. 0:0(0) ack 1 + +// SYN-ACK maybe getting blackholed, disable ECN ++2~+2.2 > [noecn] S. 0:0(0) ack 1 + +// Received an ACK with ACE flags, state should be set to negotiation succeeded ++0.1 < [noecn] W. 1:1(0) ack 1 win 320 ++.002 accept(3, ..., ...) = 4 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ack_ace_flags_drop.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ack_ace_flags_drop.pkt new file mode 100644 index 000000000000..ccfe353a8ee4 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ack_ace_flags_drop.pkt @@ -0,0 +1,26 @@ +// Test that SYN-ACK with ACE flags got dropped +// We retry one more time with ACE and then +// fallback to disabled ECN + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +sysctl -q net.ipv4.tcp_ecn_option=2 +` + ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 ++0 bind(3, ..., ...) = 0 ++0 listen(3, 1) = 0 + ++0 < [noecn] SEWA 0:0(0) win 32792 ++.002 > [noecn] SW. 0:0(0) ack 1 + +// Retransmit SYN-ACK without option ++1~+1.1 > [noecn] SW. 0:0(0) ack 1 + +// SYN-ACK maybe getting blackholed, disable ECN ++2~+2.2 > [noecn] S. 0:0(0) ack 1 + +// Received an ACK with no ACE flags, state should be set to blackholed ++0.1 < [noecn] . 1:1(0) ack 1 win 320 ++0 accept(3, ..., ...) = 4 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ce.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ce.pkt new file mode 100644 index 000000000000..dc83f7a18180 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ce.pkt @@ -0,0 +1,13 @@ +// Test AccECN ECN field reflector in SYNACK + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < [ce] SEWA 0:0(0) win 32792 ++.002 > SWA. 0:0(0) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ect0.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ect0.pkt new file mode 100644 index 000000000000..e63a8d018c37 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ect0.pkt @@ -0,0 +1,13 @@ +// Test AccECN ECN field reflector in SYNACK + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < [ect0] SEWA 0:0(0) win 32792 ++.002 > SA. 0:0(0) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ect1.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ect1.pkt new file mode 100644 index 000000000000..23c0e43b3dbe --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ect1.pkt @@ -0,0 +1,13 @@ +// Test AccECN ECN field reflector in SYNACK + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < [ect1] SEWA 0:0(0) win 32792 ++.002 > SEW. 0:0(0) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ce.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ce.pkt new file mode 100644 index 000000000000..c3497738f680 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ce.pkt @@ -0,0 +1,27 @@ +// Test SYNACK CE & received_ce update + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +sysctl -q net.ipv4.tcp_ecn_option=2 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 0.052 connect(4, ..., ...) = 0 + ++.002 > [noecn] SEWA 0:0(0) ++0.05 < [ce] SW. 0:0(0) ack 1 win 32767 ++.002 > [ect0] WA. 1:1(0) ack 1 + ++0.01 write(4, ..., 100) = 100 ++.002 > [ect0] P.6 1:101(100) ack 1 ++0.05 < [ect0] P.5 1:101(100) ack 101 win 256 ++.002 > [ect0] .6 101:101(0) ack 101 + ++0.01 write(4, ..., 100) = 100 ++.002 > [ect0] P.6 101:201(100) ack 101 + ++0.1 < [ect1] P.5 201:301(100) ack 201 win 256 ++.002 > [ect0] .6 201:201(0) ack 101 + ++0.01 < [ce] .6 401:501(100) ack 201 win 256 ++.002 > [ect0] .7 201:201(0) ack 101 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ce_updates_delivered_ce.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ce_updates_delivered_ce.pkt new file mode 100644 index 000000000000..5fd77f466572 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ce_updates_delivered_ce.pkt @@ -0,0 +1,22 @@ +// Reflected SYNACK CE mark increases delivered_ce + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +sysctl -q net.ipv4.tcp_ecn_fallback=0 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + ++0.05 < SEWA 0:0(0) win 32767 ++.002 > SW. 0:0(0) ack 1 +// Fake ce for prev, ECT validator must be disabled for this to work ++0.05 < [ect0] WA. 1:1(0) ack 1 win 264 ++.002 accept(3, ..., ...) = 4 + ++0.01 %{ assert tcpi_delivered_ce == 1, tcpi_delivered_ce }% + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 1:1001(1000) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ect0.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ect0.pkt new file mode 100644 index 000000000000..f6ad1ea5c0c4 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ect0.pkt @@ -0,0 +1,24 @@ +// Test SYN=0 reflector + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 0.052 connect(4, ..., ...) = 0 + ++.002 > SEWA 0:0(0) ++0.05 < [ect0] SW. 0:0(0) ack 1 win 32767 ++.002 > [ect0] A. 1:1(0) ack 1 + ++0.01 write(4, ..., 100) = 100 ++.002 > [ect0] P.5 1:101(100) ack 1 ++0.05 < [ect0] P.5 1:1(0) ack 101 win 256 + ++0.01 < [ect0] P.5 1:101(100) ack 101 win 256 ++.002 > [ect0] .5 101:101(0) ack 101 ++0 read(4, ..., 100) = 100 + ++0 close(4) = 0 ++0 > F.5 101:101(0) ack 101 <...> ++0.1 < R. 101:101(0) ack 102 win 4242 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ect1.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ect1.pkt new file mode 100644 index 000000000000..7ecfc5fb9dbb --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ect1.pkt @@ -0,0 +1,24 @@ +// Test SYN=0 reflector + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 0.052 connect(4, ..., ...) = 0 + ++.002 > SEWA 0:0(0) ++0.05 < [ect1] SW. 0:0(0) ack 1 win 32767 ++.002 > [ect0] EW. 1:1(0) ack 1 + ++0.01 write(4, ..., 100) = 100 ++.002 > [ect0] P.5 1:101(100) ack 1 ++0.05 < [ect1] P.5 1:1(0) ack 101 win 256 + ++0.01 < [ect1] P.5 1:101(100) ack 101 win 256 ++.002 > [ect0] .5 101:101(0) ack 101 ++0 read(4, ..., 100) = 100 + ++0 close(4) = 0 ++0 > F5. 101:101(0) ack 101 <...> ++0.1 < R. 101:101(0) ack 102 win 4242 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_rexmit.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_rexmit.pkt new file mode 100644 index 000000000000..9e0959782ef5 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_rexmit.pkt @@ -0,0 +1,15 @@ +// Test 3rd ACK flags when SYN-ACK is rexmitted + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 0.052 connect(4, ..., ...) = 0 + ++.002 > SEWA 0:0(0) ++0.05 < SW. 0:0(0) ack 1 win 32767 ++.002 > [ect0] W. 1:1(0) ack 1 + ++0.05 < SW. 0:0(0) ack 1 win 32767 ++.002 > [ect0] W. 1:1(0) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_rxmt.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_rxmt.pkt new file mode 100644 index 000000000000..a5a41633af07 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_rxmt.pkt @@ -0,0 +1,25 @@ +// Test that we retransmit SYN-ACK with ACE and without +// AccECN options after +// SYN-ACK was lost and TCP moved to TCPS_SYN_RECEIVED + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +sysctl -q net.ipv4.tcp_ecn_option=2 +` + ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 ++0 bind(3, ..., ...) = 0 ++0 listen(3, 1) = 0 + ++0 < [noecn] SEWA 0:0(0) win 32792 ++.002 > [noecn] SW. 0:0(0) ack 1 + +// Retransmit SYN-ACK without option ++1~+1.1 > [noecn] SW. 0:0(0) ack 1 ++0.1 < [noecn] W. 1:1(0) ack 1 win 320 ++.002 accept(3, ..., ...) = 4 + +// We try to write with AccECN option ++0.01 write(4, ..., 100) = 100 ++.002 > [ect0] P5. 1:101(100) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_tsnoprogress.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_tsnoprogress.pkt new file mode 100644 index 000000000000..f3fe2f098966 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_tsnoprogress.pkt @@ -0,0 +1,26 @@ +// Test TS progress filtering +--tcp_ts_tick_usecs=1000 +--tolerance_usecs=7000 + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 ++.002 > SW. 0:0(0) ack 1 ++0.05 < [ect0] W. 1:1(0) ack 1 win 264 ++.002 accept(3, ..., ...) = 4 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 1:1001(1000) ack 1 + // Fake CE and claim no progress ++0.05 < [ect0] WA. 1:1(0) ack 1 win 264 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_tsprogress.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_tsprogress.pkt new file mode 100644 index 000000000000..1446799d2481 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_tsprogress.pkt @@ -0,0 +1,25 @@ +// Test TS progress filtering +--tcp_ts_tick_usecs=1000 + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 ++.002 > SW. 0:0(0) ack 1 ++0.05 < [ect0] W. 1:1(0) ack 1 win 264 ++.002 accept(3, ..., ...) = 4 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 1:1001(1000) ack 1 + // Fake CE and claim no progress ++0.05 < [ect0] WA. 1:1(0) ack 1 win 264 + ++0.01 %{ assert tcpi_delivered_ce == 1, tcpi_delivered_ce }%