mirror of
https://github.com/torvalds/linux.git
synced 2026-05-27 08:33:17 +02:00
tcp: fix stale per-CPU tcp_tw_isn leak enabling ISN prediction
Blamed commit moved the TIME_WAIT-derived ISN from the skb control
block to a per-CPU variable, assuming the value would always be consumed
by tcp_conn_request() for the same packet that wrote it. That assumption
is violated by multiple drop paths between the producer
(__this_cpu_write(tcp_tw_isn, isn) in tcp_v{4,6}_rcv()) and the consumer
(tcp_conn_request()):
- min_ttl / min_hopcount check
- xfrm policy check
- tcp_inbound_hash() MD5/AO mismatch
- tcp_filter() eBPF/SO_ATTACH_FILTER drop
- th->syn && th->fin discard in tcp_rcv_state_process() TCP_LISTEN
- psp_sk_rx_policy_check() in tcp_v{4,6}_do_rcv()
- tcp_checksum_complete() in tcp_v{4,6}_do_rcv()
- tcp_v{4,6}_cookie_check() returning NULL
When a packet is dropped on any of these paths, tcp_tw_isn is left set.
The next SYN processed on the same CPU then consumes the non zero value in
tcp_conn_request(), receiving a potentially predictable ISN.
This patch moves back tcp_tw_isn to skb->cb[], getting rid of the per-cpu
variable.
Note that tcp_v{4,6}_fill_cb() do not set it.
Very litle impact on overall code size/complexity:
$ scripts/bloat-o-meter -t vmlinux.old vmlinux.new
add/remove: 0/0 grow/shrink: 2/1 up/down: 8/-15 (-7)
Function old new delta
tcp_v6_rcv 3038 3042 +4
tcp_v4_rcv 3035 3039 +4
tcp_conn_request 2938 2923 -15
Total: Before=24436060, After=24436053, chg -0.00%
Fixes: 41eecbd712 ("tcp: replace TCP_SKB_CB(skb)->tcp_tw_isn with a per-cpu field")
Reported-by: Chris Mason <clm@meta.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20260519084611.2485277-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
parent
99e22ddf4e
commit
1bbf0ced1d
|
|
@ -65,8 +65,6 @@ static inline void tcp_orphan_count_dec(void)
|
|||
this_cpu_dec(tcp_orphan_count);
|
||||
}
|
||||
|
||||
DECLARE_PER_CPU(u32, tcp_tw_isn);
|
||||
|
||||
void tcp_time_wait(struct sock *sk, int state, int timeo);
|
||||
|
||||
#define MAX_TCP_HEADER L1_CACHE_ALIGN(128 + MAX_HEADER)
|
||||
|
|
@ -1102,10 +1100,13 @@ struct tcp_skb_cb {
|
|||
__u32 seq; /* Starting sequence number */
|
||||
__u32 end_seq; /* SEQ + FIN + SYN + datalen */
|
||||
union {
|
||||
/* Note :
|
||||
/* Notes :
|
||||
* tcp_tw_isn is used in input path only
|
||||
* (isn chosen by tcp_timewait_state_process())
|
||||
* tcp_gso_segs/size are used in write queue only,
|
||||
* cf tcp_skb_pcount()/tcp_skb_mss()
|
||||
*/
|
||||
u32 tcp_tw_isn;
|
||||
struct {
|
||||
u16 tcp_gso_segs;
|
||||
u16 tcp_gso_size;
|
||||
|
|
|
|||
|
|
@ -299,9 +299,6 @@ enum {
|
|||
DEFINE_PER_CPU(unsigned int, tcp_orphan_count);
|
||||
EXPORT_PER_CPU_SYMBOL_GPL(tcp_orphan_count);
|
||||
|
||||
DEFINE_PER_CPU(u32, tcp_tw_isn);
|
||||
EXPORT_PER_CPU_SYMBOL_GPL(tcp_tw_isn);
|
||||
|
||||
long sysctl_tcp_mem[3] __read_mostly;
|
||||
|
||||
DEFINE_PER_CPU(int, tcp_memory_per_cpu_fw_alloc);
|
||||
|
|
|
|||
|
|
@ -7589,6 +7589,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
|
|||
struct sock *sk, struct sk_buff *skb)
|
||||
{
|
||||
struct tcp_fastopen_cookie foc = { .len = -1 };
|
||||
u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn;
|
||||
struct tcp_options_received tmp_opt;
|
||||
const struct tcp_sock *tp = tcp_sk(sk);
|
||||
struct net *net = sock_net(sk);
|
||||
|
|
@ -7599,20 +7600,16 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
|
|||
struct dst_entry *dst;
|
||||
struct flowi fl;
|
||||
u8 syncookies;
|
||||
u32 isn;
|
||||
|
||||
#ifdef CONFIG_TCP_AO
|
||||
const struct tcp_ao_hdr *aoh;
|
||||
#endif
|
||||
|
||||
isn = __this_cpu_read(tcp_tw_isn);
|
||||
if (isn) {
|
||||
/* TW buckets are converted to open requests without
|
||||
* limitations, they conserve resources and peer is
|
||||
* evidently real one.
|
||||
*/
|
||||
__this_cpu_write(tcp_tw_isn, 0);
|
||||
} else {
|
||||
/* If isn is non-zero, this SYN originally matched a TIME_WAIT socket.
|
||||
* TW sockets are converted to open requests without limitations,
|
||||
* we skip the queue limits and syncookie checks in the block below.
|
||||
*/
|
||||
if (!isn) {
|
||||
syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies);
|
||||
|
||||
if (syncookies == 2 || inet_csk_reqsk_queue_is_full(sk)) {
|
||||
|
|
|
|||
|
|
@ -2198,6 +2198,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
|
|||
}
|
||||
}
|
||||
|
||||
isn = 0;
|
||||
process:
|
||||
if (static_branch_unlikely(&ip4_min_ttl)) {
|
||||
/* min_ttl can be changed concurrently from do_ip_setsockopt() */
|
||||
|
|
@ -2227,6 +2228,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
|
|||
th = (const struct tcphdr *)skb->data;
|
||||
iph = ip_hdr(skb);
|
||||
tcp_v4_fill_cb(skb, iph, th);
|
||||
TCP_SKB_CB(skb)->tcp_tw_isn = isn;
|
||||
|
||||
skb->dev = NULL;
|
||||
|
||||
|
|
@ -2313,7 +2315,6 @@ int tcp_v4_rcv(struct sk_buff *skb)
|
|||
sk = sk2;
|
||||
tcp_v4_restore_cb(skb);
|
||||
refcounted = false;
|
||||
__this_cpu_write(tcp_tw_isn, isn);
|
||||
goto process;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1839,6 +1839,7 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
|
|||
}
|
||||
}
|
||||
|
||||
isn = 0;
|
||||
process:
|
||||
if (static_branch_unlikely(&ip6_min_hopcount)) {
|
||||
/* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
|
||||
|
|
@ -1868,6 +1869,7 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
|
|||
th = (const struct tcphdr *)skb->data;
|
||||
hdr = ipv6_hdr(skb);
|
||||
tcp_v6_fill_cb(skb, hdr, th);
|
||||
TCP_SKB_CB(skb)->tcp_tw_isn = isn;
|
||||
|
||||
skb->dev = NULL;
|
||||
|
||||
|
|
@ -1956,7 +1958,6 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
|
|||
sk = sk2;
|
||||
tcp_v6_restore_cb(skb);
|
||||
refcounted = false;
|
||||
__this_cpu_write(tcp_tw_isn, isn);
|
||||
goto process;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user