tcp: fix stale per-CPU tcp_tw_isn leak enabling ISN prediction

Blamed commit moved the TIME_WAIT-derived ISN from the skb control
block to a per-CPU variable, assuming the value would always be consumed
by tcp_conn_request() for the same packet that wrote it. That assumption
is violated by multiple drop paths between the producer
(__this_cpu_write(tcp_tw_isn, isn) in tcp_v{4,6}_rcv()) and the consumer
(tcp_conn_request()):

 - min_ttl / min_hopcount check
 - xfrm policy check
 - tcp_inbound_hash() MD5/AO mismatch
 - tcp_filter() eBPF/SO_ATTACH_FILTER drop
 - th->syn && th->fin discard in tcp_rcv_state_process() TCP_LISTEN
 - psp_sk_rx_policy_check() in tcp_v{4,6}_do_rcv()
 - tcp_checksum_complete() in tcp_v{4,6}_do_rcv()
 - tcp_v{4,6}_cookie_check() returning NULL

When a packet is dropped on any of these paths, tcp_tw_isn is left set.

The next SYN processed on the same CPU then consumes the non zero value in
tcp_conn_request(), receiving a potentially predictable ISN.

This patch moves back tcp_tw_isn to skb->cb[], getting rid of the per-cpu
variable.

Note that tcp_v{4,6}_fill_cb() do not set it.

Very litle impact on overall code size/complexity:

$ scripts/bloat-o-meter -t vmlinux.old vmlinux.new
add/remove: 0/0 grow/shrink: 2/1 up/down: 8/-15 (-7)
Function                                     old     new   delta
tcp_v6_rcv                                  3038    3042      +4
tcp_v4_rcv                                  3035    3039      +4
tcp_conn_request                            2938    2923     -15
Total: Before=24436060, After=24436053, chg -0.00%

Fixes: 41eecbd712 ("tcp: replace TCP_SKB_CB(skb)->tcp_tw_isn with a per-cpu field")
Reported-by: Chris Mason <clm@meta.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20260519084611.2485277-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Eric Dumazet 2026-05-19 08:46:11 +00:00 committed by Jakub Kicinski
parent 99e22ddf4e
commit 1bbf0ced1d
5 changed files with 14 additions and 17 deletions

View File

@ -65,8 +65,6 @@ static inline void tcp_orphan_count_dec(void)
this_cpu_dec(tcp_orphan_count);
}
DECLARE_PER_CPU(u32, tcp_tw_isn);
void tcp_time_wait(struct sock *sk, int state, int timeo);
#define MAX_TCP_HEADER L1_CACHE_ALIGN(128 + MAX_HEADER)
@ -1102,10 +1100,13 @@ struct tcp_skb_cb {
__u32 seq; /* Starting sequence number */
__u32 end_seq; /* SEQ + FIN + SYN + datalen */
union {
/* Note :
/* Notes :
* tcp_tw_isn is used in input path only
* (isn chosen by tcp_timewait_state_process())
* tcp_gso_segs/size are used in write queue only,
* cf tcp_skb_pcount()/tcp_skb_mss()
*/
u32 tcp_tw_isn;
struct {
u16 tcp_gso_segs;
u16 tcp_gso_size;

View File

@ -299,9 +299,6 @@ enum {
DEFINE_PER_CPU(unsigned int, tcp_orphan_count);
EXPORT_PER_CPU_SYMBOL_GPL(tcp_orphan_count);
DEFINE_PER_CPU(u32, tcp_tw_isn);
EXPORT_PER_CPU_SYMBOL_GPL(tcp_tw_isn);
long sysctl_tcp_mem[3] __read_mostly;
DEFINE_PER_CPU(int, tcp_memory_per_cpu_fw_alloc);

View File

@ -7589,6 +7589,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
struct sock *sk, struct sk_buff *skb)
{
struct tcp_fastopen_cookie foc = { .len = -1 };
u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn;
struct tcp_options_received tmp_opt;
const struct tcp_sock *tp = tcp_sk(sk);
struct net *net = sock_net(sk);
@ -7599,20 +7600,16 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
struct dst_entry *dst;
struct flowi fl;
u8 syncookies;
u32 isn;
#ifdef CONFIG_TCP_AO
const struct tcp_ao_hdr *aoh;
#endif
isn = __this_cpu_read(tcp_tw_isn);
if (isn) {
/* TW buckets are converted to open requests without
* limitations, they conserve resources and peer is
* evidently real one.
*/
__this_cpu_write(tcp_tw_isn, 0);
} else {
/* If isn is non-zero, this SYN originally matched a TIME_WAIT socket.
* TW sockets are converted to open requests without limitations,
* we skip the queue limits and syncookie checks in the block below.
*/
if (!isn) {
syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies);
if (syncookies == 2 || inet_csk_reqsk_queue_is_full(sk)) {

View File

@ -2198,6 +2198,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
}
}
isn = 0;
process:
if (static_branch_unlikely(&ip4_min_ttl)) {
/* min_ttl can be changed concurrently from do_ip_setsockopt() */
@ -2227,6 +2228,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
th = (const struct tcphdr *)skb->data;
iph = ip_hdr(skb);
tcp_v4_fill_cb(skb, iph, th);
TCP_SKB_CB(skb)->tcp_tw_isn = isn;
skb->dev = NULL;
@ -2313,7 +2315,6 @@ int tcp_v4_rcv(struct sk_buff *skb)
sk = sk2;
tcp_v4_restore_cb(skb);
refcounted = false;
__this_cpu_write(tcp_tw_isn, isn);
goto process;
}

View File

@ -1839,6 +1839,7 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
}
}
isn = 0;
process:
if (static_branch_unlikely(&ip6_min_hopcount)) {
/* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
@ -1868,6 +1869,7 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
th = (const struct tcphdr *)skb->data;
hdr = ipv6_hdr(skb);
tcp_v6_fill_cb(skb, hdr, th);
TCP_SKB_CB(skb)->tcp_tw_isn = isn;
skb->dev = NULL;
@ -1956,7 +1958,6 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
sk = sk2;
tcp_v6_restore_cb(skb);
refcounted = false;
__this_cpu_write(tcp_tw_isn, isn);
goto process;
}