From 267bf3cf9a6f0ffb98b8afd983c1950e835f07c9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Apr 2026 20:03:06 +0000 Subject: [PATCH 01/14] tcp: annotate data-races in tcp_get_info_chrono_stats() tcp_get_timestamping_opt_stats() does not own the socket lock, this is intentional. It calls tcp_get_info_chrono_stats() while other threads could change chrono fields in tcp_chrono_set(). I do not think we need coherent TCP socket state snapshot in tcp_get_timestamping_opt_stats(), I chose to only add annotations to keep KCSAN happy. Fixes: 1c885808e456 ("tcp: SOF_TIMESTAMPING_OPT_STATS option for SO_TIMESTAMPING") Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20260416200319.3608680-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/tcp.h | 10 +++++++--- net/ipv4/tcp.c | 14 ++++++++++---- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index dfa52ceefd23..674af493882c 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2208,10 +2208,14 @@ static inline void tcp_chrono_set(struct tcp_sock *tp, const enum tcp_chrono new const u32 now = tcp_jiffies32; enum tcp_chrono old = tp->chrono_type; + /* Following WRITE_ONCE()s pair with READ_ONCE()s in + * tcp_get_info_chrono_stats(). + */ if (old > TCP_CHRONO_UNSPEC) - tp->chrono_stat[old - 1] += now - tp->chrono_start; - tp->chrono_start = now; - tp->chrono_type = new; + WRITE_ONCE(tp->chrono_stat[old - 1], + tp->chrono_stat[old - 1] + now - tp->chrono_start); + WRITE_ONCE(tp->chrono_start, now); + WRITE_ONCE(tp->chrono_type, new); } static inline void tcp_chrono_start(struct sock *sk, const enum tcp_chrono type) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1a494d18c5fd..7b7812cb710f 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4191,12 +4191,18 @@ static void tcp_get_info_chrono_stats(const struct tcp_sock *tp, struct tcp_info *info) { u64 stats[__TCP_CHRONO_MAX], total = 0; - enum tcp_chrono i; + enum tcp_chrono i, cur; + /* Following READ_ONCE()s pair with WRITE_ONCE()s in tcp_chrono_set(). + * This is because socket lock might not be owned by us at this point. + * This is best effort, tcp_get_timestamping_opt_stats() can + * see wrong values. A real fix would be too costly for TCP fast path. + */ + cur = READ_ONCE(tp->chrono_type); for (i = TCP_CHRONO_BUSY; i < __TCP_CHRONO_MAX; ++i) { - stats[i] = tp->chrono_stat[i - 1]; - if (i == tp->chrono_type) - stats[i] += tcp_jiffies32 - tp->chrono_start; + stats[i] = READ_ONCE(tp->chrono_stat[i - 1]); + if (i == cur) + stats[i] += tcp_jiffies32 - READ_ONCE(tp->chrono_start); stats[i] *= USEC_PER_SEC / HZ; total += stats[i]; } From 21e92a38cfd891538598ba8f805e0165a820d532 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Apr 2026 20:03:07 +0000 Subject: [PATCH 02/14] tcp: add data-race annotations around tp->data_segs_out and tp->total_retrans tcp_get_timestamping_opt_stats() intentionally runs lockless, we must add READ_ONCE() and WRITE_ONCE() annotations to keep KCSAN happy. Fixes: 7e98102f4897 ("tcp: record pkts sent and retransmistted") Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20260416200319.3608680-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp.c | 4 ++-- net/ipv4/tcp_output.c | 8 +++++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 7b7812cb710f..e39e0734d958 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4434,9 +4434,9 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk, nla_put_u64_64bit(stats, TCP_NLA_SNDBUF_LIMITED, info.tcpi_sndbuf_limited, TCP_NLA_PAD); nla_put_u64_64bit(stats, TCP_NLA_DATA_SEGS_OUT, - tp->data_segs_out, TCP_NLA_PAD); + READ_ONCE(tp->data_segs_out), TCP_NLA_PAD); nla_put_u64_64bit(stats, TCP_NLA_TOTAL_RETRANS, - tp->total_retrans, TCP_NLA_PAD); + READ_ONCE(tp->total_retrans), TCP_NLA_PAD); rate = READ_ONCE(sk->sk_pacing_rate); rate64 = (rate != ~0UL) ? rate : ~0ULL; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 8e99687526a6..d8e8bba2d03a 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1688,7 +1688,8 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, if (skb->len != tcp_header_size) { tcp_event_data_sent(tp, sk); - tp->data_segs_out += tcp_skb_pcount(skb); + WRITE_ONCE(tp->data_segs_out, + tp->data_segs_out + tcp_skb_pcount(skb)); tp->bytes_sent += skb->len - tcp_header_size; } @@ -3642,7 +3643,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) TCP_ADD_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS, segs); if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); - tp->total_retrans += segs; + WRITE_ONCE(tp->total_retrans, tp->total_retrans + segs); tp->bytes_retrans += skb->len; /* make sure skb->data is aligned on arches that require it @@ -4646,7 +4647,8 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req) * However in this case, we are dealing with a passive fastopen * socket thus we can change total_retrans value. */ - tcp_sk_rw(sk)->total_retrans++; + WRITE_ONCE(tcp_sk_rw(sk)->total_retrans, + tcp_sk_rw(sk)->total_retrans + 1); } trace_tcp_retransmit_synack(sk, req); WRITE_ONCE(req->num_retrans, req->num_retrans + 1); From 829ba1f329cb7cbd56d599a6d225997fba66dc32 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Apr 2026 20:03:08 +0000 Subject: [PATCH 03/14] tcp: add data-races annotations around tp->reordering, tp->snd_cwnd tcp_get_timestamping_opt_stats() intentionally runs lockless, we must add READ_ONCE(), WRITE_ONCE() data_race() annotations to keep KCSAN happy. Fixes: bb7c19f96012 ("tcp: add related fields into SCM_TIMESTAMPING_OPT_STATS") Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20260416200319.3608680-4-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/tcp.h | 2 +- net/ipv4/tcp.c | 8 ++++---- net/ipv4/tcp_input.c | 14 ++++++++------ net/ipv4/tcp_metrics.c | 2 +- 4 files changed, 14 insertions(+), 12 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 674af493882c..ecbadcb3a744 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1513,7 +1513,7 @@ static inline u32 tcp_snd_cwnd(const struct tcp_sock *tp) static inline void tcp_snd_cwnd_set(struct tcp_sock *tp, u32 val) { WARN_ON_ONCE((int)val <= 0); - tp->snd_cwnd = val; + WRITE_ONCE(tp->snd_cwnd, val); } static inline bool tcp_in_slow_start(const struct tcp_sock *tp) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e39e0734d958..24ba80d244b1 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4445,13 +4445,13 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk, rate64 = tcp_compute_delivery_rate(tp); nla_put_u64_64bit(stats, TCP_NLA_DELIVERY_RATE, rate64, TCP_NLA_PAD); - nla_put_u32(stats, TCP_NLA_SND_CWND, tcp_snd_cwnd(tp)); - nla_put_u32(stats, TCP_NLA_REORDERING, tp->reordering); - nla_put_u32(stats, TCP_NLA_MIN_RTT, tcp_min_rtt(tp)); + nla_put_u32(stats, TCP_NLA_SND_CWND, READ_ONCE(tp->snd_cwnd)); + nla_put_u32(stats, TCP_NLA_REORDERING, READ_ONCE(tp->reordering)); + nla_put_u32(stats, TCP_NLA_MIN_RTT, data_race(tcp_min_rtt(tp))); nla_put_u8(stats, TCP_NLA_RECUR_RETRANS, READ_ONCE(inet_csk(sk)->icsk_retransmits)); - nla_put_u8(stats, TCP_NLA_DELIVERY_RATE_APP_LMT, !!tp->rate_app_limited); + nla_put_u8(stats, TCP_NLA_DELIVERY_RATE_APP_LMT, data_race(!!tp->rate_app_limited)); nla_put_u32(stats, TCP_NLA_SND_SSTHRESH, tp->snd_ssthresh); nla_put_u32(stats, TCP_NLA_DELIVERED, tp->delivered); nla_put_u32(stats, TCP_NLA_DELIVERED_CE, tp->delivered_ce); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 021f745747c5..6bb6bf049a35 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1293,8 +1293,9 @@ static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq, tp->sacked_out, tp->undo_marker ? tp->undo_retrans : 0); #endif - tp->reordering = min_t(u32, (metric + mss - 1) / mss, - READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering)); + WRITE_ONCE(tp->reordering, + min_t(u32, (metric + mss - 1) / mss, + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering))); } /* This exciting event is worth to be remembered. 8) */ @@ -2439,8 +2440,9 @@ static void tcp_check_reno_reordering(struct sock *sk, const int addend) if (!tcp_limit_reno_sacked(tp)) return; - tp->reordering = min_t(u32, tp->packets_out + addend, - READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering)); + WRITE_ONCE(tp->reordering, + min_t(u32, tp->packets_out + addend, + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering))); tp->reord_seen++; NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER); } @@ -2579,8 +2581,8 @@ void tcp_enter_loss(struct sock *sk) reordering = READ_ONCE(net->ipv4.sysctl_tcp_reordering); if (icsk->icsk_ca_state <= TCP_CA_Disorder && tp->sacked_out >= reordering) - tp->reordering = min_t(unsigned int, tp->reordering, - reordering); + WRITE_ONCE(tp->reordering, + min_t(unsigned int, tp->reordering, reordering)); tcp_set_ca_state(sk, TCP_CA_Loss); tp->high_seq = tp->snd_nxt; diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 06b1d5d3b6df..7a9d6d9006f6 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -496,7 +496,7 @@ void tcp_init_metrics(struct sock *sk) } val = tcp_metric_get(tm, TCP_METRIC_REORDERING); if (val && tp->reordering != val) - tp->reordering = val; + WRITE_ONCE(tp->reordering, val); crtt = tcp_metric_get(tm, TCP_METRIC_RTT); rcu_read_unlock(); From fd571afb05ebaeac5d8f09460a0640d4cf6755f8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Apr 2026 20:03:09 +0000 Subject: [PATCH 04/14] tcp: annotate data-races around tp->snd_ssthresh tcp_get_timestamping_opt_stats() intentionally runs lockless, we must add READ_ONCE() and WRITE_ONCE() annotations to keep KCSAN happy. Fixes: 7156d194a077 ("tcp: add snd_ssthresh stat in SCM_TIMESTAMPING_OPT_STATS") Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20260416200319.3608680-5-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/filter.c | 2 +- net/ipv4/tcp.c | 4 ++-- net/ipv4/tcp_bbr.c | 6 +++--- net/ipv4/tcp_bic.c | 2 +- net/ipv4/tcp_cdg.c | 4 ++-- net/ipv4/tcp_cubic.c | 6 +++--- net/ipv4/tcp_dctcp.c | 2 +- net/ipv4/tcp_input.c | 8 ++++---- net/ipv4/tcp_metrics.c | 4 ++-- net/ipv4/tcp_nv.c | 4 ++-- net/ipv4/tcp_output.c | 4 ++-- net/ipv4/tcp_vegas.c | 9 +++++---- net/ipv4/tcp_westwood.c | 4 ++-- net/ipv4/tcp_yeah.c | 3 ++- 14 files changed, 32 insertions(+), 30 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index fcfcb72663ca..3b5609fb96de 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5396,7 +5396,7 @@ static int bpf_sol_tcp_setsockopt(struct sock *sk, int optname, if (val <= 0) return -EINVAL; tp->snd_cwnd_clamp = val; - tp->snd_ssthresh = val; + WRITE_ONCE(tp->snd_ssthresh, val); break; case TCP_BPF_DELACK_MAX: timeout = usecs_to_jiffies(val); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 24ba80d244b1..802a9ea05211 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3425,7 +3425,7 @@ int tcp_disconnect(struct sock *sk, int flags) icsk->icsk_rto = TCP_TIMEOUT_INIT; WRITE_ONCE(icsk->icsk_rto_min, TCP_RTO_MIN); WRITE_ONCE(icsk->icsk_delack_max, TCP_DELACK_MAX); - tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; + WRITE_ONCE(tp->snd_ssthresh, TCP_INFINITE_SSTHRESH); tcp_snd_cwnd_set(tp, TCP_INIT_CWND); tp->snd_cwnd_cnt = 0; tp->is_cwnd_limited = 0; @@ -4452,7 +4452,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk, nla_put_u8(stats, TCP_NLA_RECUR_RETRANS, READ_ONCE(inet_csk(sk)->icsk_retransmits)); nla_put_u8(stats, TCP_NLA_DELIVERY_RATE_APP_LMT, data_race(!!tp->rate_app_limited)); - nla_put_u32(stats, TCP_NLA_SND_SSTHRESH, tp->snd_ssthresh); + nla_put_u32(stats, TCP_NLA_SND_SSTHRESH, READ_ONCE(tp->snd_ssthresh)); nla_put_u32(stats, TCP_NLA_DELIVERED, tp->delivered); nla_put_u32(stats, TCP_NLA_DELIVERED_CE, tp->delivered_ce); diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 1ddc20a399b0..aec7805b1d37 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -897,8 +897,8 @@ static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs) if (bbr->mode == BBR_STARTUP && bbr_full_bw_reached(sk)) { bbr->mode = BBR_DRAIN; /* drain queue we created */ - tcp_sk(sk)->snd_ssthresh = - bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT); + WRITE_ONCE(tcp_sk(sk)->snd_ssthresh, + bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT)); } /* fall through to check if in-flight is already small: */ if (bbr->mode == BBR_DRAIN && bbr_packets_in_net_at_edt(sk, tcp_packets_in_flight(tcp_sk(sk))) <= @@ -1043,7 +1043,7 @@ __bpf_kfunc static void bbr_init(struct sock *sk) struct bbr *bbr = inet_csk_ca(sk); bbr->prior_cwnd = 0; - tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; + WRITE_ONCE(tp->snd_ssthresh, TCP_INFINITE_SSTHRESH); bbr->rtt_cnt = 0; bbr->next_rtt_delivered = tp->delivered; bbr->prev_ca_state = TCP_CA_Open; diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index 58358bf92e1b..65444ff14241 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c @@ -74,7 +74,7 @@ static void bictcp_init(struct sock *sk) bictcp_reset(ca); if (initial_ssthresh) - tcp_sk(sk)->snd_ssthresh = initial_ssthresh; + WRITE_ONCE(tcp_sk(sk)->snd_ssthresh, initial_ssthresh); } /* diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c index ceabfd690a29..0812c390aee5 100644 --- a/net/ipv4/tcp_cdg.c +++ b/net/ipv4/tcp_cdg.c @@ -162,7 +162,7 @@ static void tcp_cdg_hystart_update(struct sock *sk) NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPHYSTARTTRAINCWND, tcp_snd_cwnd(tp)); - tp->snd_ssthresh = tcp_snd_cwnd(tp); + WRITE_ONCE(tp->snd_ssthresh, tcp_snd_cwnd(tp)); return; } } @@ -181,7 +181,7 @@ static void tcp_cdg_hystart_update(struct sock *sk) NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPHYSTARTDELAYCWND, tcp_snd_cwnd(tp)); - tp->snd_ssthresh = tcp_snd_cwnd(tp); + WRITE_ONCE(tp->snd_ssthresh, tcp_snd_cwnd(tp)); } } } diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index ab78b5ae8d0e..119bf8cbb007 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -136,7 +136,7 @@ __bpf_kfunc static void cubictcp_init(struct sock *sk) bictcp_hystart_reset(sk); if (!hystart && initial_ssthresh) - tcp_sk(sk)->snd_ssthresh = initial_ssthresh; + WRITE_ONCE(tcp_sk(sk)->snd_ssthresh, initial_ssthresh); } __bpf_kfunc static void cubictcp_cwnd_event_tx_start(struct sock *sk) @@ -420,7 +420,7 @@ static void hystart_update(struct sock *sk, u32 delay) NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPHYSTARTTRAINCWND, tcp_snd_cwnd(tp)); - tp->snd_ssthresh = tcp_snd_cwnd(tp); + WRITE_ONCE(tp->snd_ssthresh, tcp_snd_cwnd(tp)); } } } @@ -440,7 +440,7 @@ static void hystart_update(struct sock *sk, u32 delay) NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPHYSTARTDELAYCWND, tcp_snd_cwnd(tp)); - tp->snd_ssthresh = tcp_snd_cwnd(tp); + WRITE_ONCE(tp->snd_ssthresh, tcp_snd_cwnd(tp)); } } } diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index 96c99999e09d..274e628e7cf8 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -177,7 +177,7 @@ static void dctcp_react_to_loss(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); ca->loss_cwnd = tcp_snd_cwnd(tp); - tp->snd_ssthresh = max(tcp_snd_cwnd(tp) >> 1U, 2U); + WRITE_ONCE(tp->snd_ssthresh, max(tcp_snd_cwnd(tp) >> 1U, 2U)); } __bpf_kfunc static void dctcp_state(struct sock *sk, u8 new_state) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 6bb6bf049a35..c6361447535f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2567,7 +2567,7 @@ void tcp_enter_loss(struct sock *sk) (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) { tp->prior_ssthresh = tcp_current_ssthresh(sk); tp->prior_cwnd = tcp_snd_cwnd(tp); - tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); + WRITE_ONCE(tp->snd_ssthresh, icsk->icsk_ca_ops->ssthresh(sk)); tcp_ca_event(sk, CA_EVENT_LOSS); tcp_init_undo(tp); } @@ -2860,7 +2860,7 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss) tcp_snd_cwnd_set(tp, icsk->icsk_ca_ops->undo_cwnd(sk)); if (tp->prior_ssthresh > tp->snd_ssthresh) { - tp->snd_ssthresh = tp->prior_ssthresh; + WRITE_ONCE(tp->snd_ssthresh, tp->prior_ssthresh); tcp_ecn_withdraw_cwr(tp); } } @@ -2978,7 +2978,7 @@ static void tcp_init_cwnd_reduction(struct sock *sk) tp->prior_cwnd = tcp_snd_cwnd(tp); tp->prr_delivered = 0; tp->prr_out = 0; - tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk); + WRITE_ONCE(tp->snd_ssthresh, inet_csk(sk)->icsk_ca_ops->ssthresh(sk)); tcp_ecn_queue_cwr(tp); } @@ -3120,7 +3120,7 @@ static void tcp_non_congestion_loss_retransmit(struct sock *sk) if (icsk->icsk_ca_state != TCP_CA_Loss) { tp->high_seq = tp->snd_nxt; - tp->snd_ssthresh = tcp_current_ssthresh(sk); + WRITE_ONCE(tp->snd_ssthresh, tcp_current_ssthresh(sk)); tp->prior_ssthresh = 0; tp->undo_marker = 0; tcp_set_ca_state(sk, TCP_CA_Loss); diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 7a9d6d9006f6..dc0c081fc1f3 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -490,9 +490,9 @@ void tcp_init_metrics(struct sock *sk) val = READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) ? 0 : tcp_metric_get(tm, TCP_METRIC_SSTHRESH); if (val) { - tp->snd_ssthresh = val; + WRITE_ONCE(tp->snd_ssthresh, val); if (tp->snd_ssthresh > tp->snd_cwnd_clamp) - tp->snd_ssthresh = tp->snd_cwnd_clamp; + WRITE_ONCE(tp->snd_ssthresh, tp->snd_cwnd_clamp); } val = tcp_metric_get(tm, TCP_METRIC_REORDERING); if (val && tp->reordering != val) diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c index a60662f4bdf9..f345897a68df 100644 --- a/net/ipv4/tcp_nv.c +++ b/net/ipv4/tcp_nv.c @@ -396,8 +396,8 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample) /* We have enough data to determine we are congested */ ca->nv_allow_cwnd_growth = 0; - tp->snd_ssthresh = - (nv_ssthresh_factor * max_win) >> 3; + WRITE_ONCE(tp->snd_ssthresh, + (nv_ssthresh_factor * max_win) >> 3); if (tcp_snd_cwnd(tp) - max_win > 2) { /* gap > 2, we do exponential cwnd decrease */ int dec; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index d8e8bba2d03a..2663505a0dd7 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -171,7 +171,7 @@ void tcp_cwnd_restart(struct sock *sk, s32 delta) tcp_ca_event(sk, CA_EVENT_CWND_RESTART); - tp->snd_ssthresh = tcp_current_ssthresh(sk); + WRITE_ONCE(tp->snd_ssthresh, tcp_current_ssthresh(sk)); restart_cwnd = min(restart_cwnd, cwnd); while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd) @@ -2143,7 +2143,7 @@ static void tcp_cwnd_application_limited(struct sock *sk) u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk)); u32 win_used = max(tp->snd_cwnd_used, init_win); if (win_used < tcp_snd_cwnd(tp)) { - tp->snd_ssthresh = tcp_current_ssthresh(sk); + WRITE_ONCE(tp->snd_ssthresh, tcp_current_ssthresh(sk)); tcp_snd_cwnd_set(tp, (tcp_snd_cwnd(tp) + win_used) >> 1); } tp->snd_cwnd_used = 0; diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index 950a66966059..574453af6bc0 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -245,7 +245,8 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked) */ tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp), (u32)target_cwnd + 1)); - tp->snd_ssthresh = tcp_vegas_ssthresh(tp); + WRITE_ONCE(tp->snd_ssthresh, + tcp_vegas_ssthresh(tp)); } else if (tcp_in_slow_start(tp)) { /* Slow start. */ @@ -261,8 +262,8 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked) * we slow down. */ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) - 1); - tp->snd_ssthresh - = tcp_vegas_ssthresh(tp); + WRITE_ONCE(tp->snd_ssthresh, + tcp_vegas_ssthresh(tp)); } else if (diff < alpha) { /* We don't have enough extra packets * in the network, so speed up. @@ -280,7 +281,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked) else if (tcp_snd_cwnd(tp) > tp->snd_cwnd_clamp) tcp_snd_cwnd_set(tp, tp->snd_cwnd_clamp); - tp->snd_ssthresh = tcp_current_ssthresh(sk); + WRITE_ONCE(tp->snd_ssthresh, tcp_current_ssthresh(sk)); } /* Wipe the slate clean for the next RTT. */ diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c index c6e97141eef2..b5a42adfd6ca 100644 --- a/net/ipv4/tcp_westwood.c +++ b/net/ipv4/tcp_westwood.c @@ -244,11 +244,11 @@ static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event) switch (event) { case CA_EVENT_COMPLETE_CWR: - tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk); + WRITE_ONCE(tp->snd_ssthresh, tcp_westwood_bw_rttmin(sk)); tcp_snd_cwnd_set(tp, tp->snd_ssthresh); break; case CA_EVENT_LOSS: - tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk); + WRITE_ONCE(tp->snd_ssthresh, tcp_westwood_bw_rttmin(sk)); /* Update RTT_min when next ack arrives */ w->reset_rtt_min = 1; break; diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index b22b3dccd05e..9e581154f18f 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c @@ -147,7 +147,8 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked) tcp_snd_cwnd_set(tp, max(tcp_snd_cwnd(tp), yeah->reno_count)); - tp->snd_ssthresh = tcp_snd_cwnd(tp); + WRITE_ONCE(tp->snd_ssthresh, + tcp_snd_cwnd(tp)); } if (yeah->reno_count <= 2) From faa886ad3ce5fc8f5156493491fe189b2b726bc9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Apr 2026 20:03:10 +0000 Subject: [PATCH 05/14] tcp: annotate data-races around tp->delivered and tp->delivered_ce tcp_get_timestamping_opt_stats() intentionally runs lockless, we must add READ_ONCE() and WRITE_ONCE() annotations to keep KCSAN happy. Fixes: feb5f2ec6464 ("tcp: export packets delivery info") Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20260416200319.3608680-6-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/tcp_ecn.h | 2 +- net/ipv4/tcp.c | 4 ++-- net/ipv4/tcp_input.c | 8 ++++---- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h index e9a933641636..865d5c5a7718 100644 --- a/include/net/tcp_ecn.h +++ b/include/net/tcp_ecn.h @@ -181,7 +181,7 @@ static inline void tcp_accecn_third_ack(struct sock *sk, tcp_accecn_validate_syn_feedback(sk, ace, sent_ect)) { if ((tcp_accecn_extract_syn_ect(ace) == INET_ECN_CE) && !tp->delivered_ce) - tp->delivered_ce++; + WRITE_ONCE(tp->delivered_ce, 1); } break; } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 802a9ea05211..0aabd02d4496 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4453,8 +4453,8 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk, READ_ONCE(inet_csk(sk)->icsk_retransmits)); nla_put_u8(stats, TCP_NLA_DELIVERY_RATE_APP_LMT, data_race(!!tp->rate_app_limited)); nla_put_u32(stats, TCP_NLA_SND_SSTHRESH, READ_ONCE(tp->snd_ssthresh)); - nla_put_u32(stats, TCP_NLA_DELIVERED, tp->delivered); - nla_put_u32(stats, TCP_NLA_DELIVERED_CE, tp->delivered_ce); + nla_put_u32(stats, TCP_NLA_DELIVERED, READ_ONCE(tp->delivered)); + nla_put_u32(stats, TCP_NLA_DELIVERED_CE, READ_ONCE(tp->delivered_ce)); nla_put_u32(stats, TCP_NLA_SNDQ_SIZE, tp->write_seq - tp->snd_una); nla_put_u8(stats, TCP_NLA_CA_STATE, inet_csk(sk)->icsk_ca_state); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c6361447535f..63ff89210a72 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -476,14 +476,14 @@ static bool tcp_accecn_process_option(struct tcp_sock *tp, static void tcp_count_delivered_ce(struct tcp_sock *tp, u32 ecn_count) { - tp->delivered_ce += ecn_count; + WRITE_ONCE(tp->delivered_ce, tp->delivered_ce + ecn_count); } /* Updates the delivered and delivered_ce counts */ static void tcp_count_delivered(struct tcp_sock *tp, u32 delivered, bool ece_ack) { - tp->delivered += delivered; + WRITE_ONCE(tp->delivered, tp->delivered + delivered); if (tcp_ecn_mode_rfc3168(tp) && ece_ack) tcp_count_delivered_ce(tp, delivered); } @@ -6779,7 +6779,7 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE); /* SYN-data is counted as two separate packets in tcp_ack() */ if (tp->delivered > 1) - --tp->delivered; + WRITE_ONCE(tp->delivered, tp->delivered - 1); } tcp_fastopen_add_skb(sk, synack); @@ -7212,7 +7212,7 @@ tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) SKB_DR_SET(reason, NOT_SPECIFIED); switch (sk->sk_state) { case TCP_SYN_RECV: - tp->delivered++; /* SYN-ACK delivery isn't tracked in tcp_ack */ + WRITE_ONCE(tp->delivered, tp->delivered + 1); /* SYN-ACK delivery isn't tracked in tcp_ack */ if (!tp->srtt_us) tcp_synack_rtt_meas(sk, req); From 124199444de467767175a9004e1574dc42523e62 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Apr 2026 20:03:11 +0000 Subject: [PATCH 06/14] tcp: add data-race annotations for TCP_NLA_SNDQ_SIZE tcp_get_timestamping_opt_stats() intentionally runs lockless, we must add READ_ONCE() and WRITE_ONCE() annotations to keep KCSAN happy. Fixes: 87ecc95d81d9 ("tcp: add send queue size stat in SCM_TIMESTAMPING_OPT_STATS") Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20260416200319.3608680-7-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp.c | 4 +++- net/ipv4/tcp_input.c | 4 ++-- net/ipv4/tcp_output.c | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 0aabd02d4496..729936d13a5c 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4456,7 +4456,9 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk, nla_put_u32(stats, TCP_NLA_DELIVERED, READ_ONCE(tp->delivered)); nla_put_u32(stats, TCP_NLA_DELIVERED_CE, READ_ONCE(tp->delivered_ce)); - nla_put_u32(stats, TCP_NLA_SNDQ_SIZE, tp->write_seq - tp->snd_una); + nla_put_u32(stats, TCP_NLA_SNDQ_SIZE, + max_t(int, 0, + READ_ONCE(tp->write_seq) - READ_ONCE(tp->snd_una))); nla_put_u8(stats, TCP_NLA_CA_STATE, inet_csk(sk)->icsk_ca_state); nla_put_u64_64bit(stats, TCP_NLA_BYTES_SENT, tp->bytes_sent, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 63ff89210a72..edb5013873e0 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3912,7 +3912,7 @@ static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack) sock_owned_by_me((struct sock *)tp); tp->bytes_acked += delta; tcp_snd_sne_update(tp, ack); - tp->snd_una = ack; + WRITE_ONCE(tp->snd_una, ack); } static void tcp_rcv_sne_update(struct tcp_sock *tp, u32 seq) @@ -7240,7 +7240,7 @@ tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) if (sk->sk_socket) sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT); - tp->snd_una = TCP_SKB_CB(skb)->ack_seq; + WRITE_ONCE(tp->snd_una, TCP_SKB_CB(skb)->ack_seq); tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale; tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 2663505a0dd7..9f83c7e4acab 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -4153,7 +4153,7 @@ static void tcp_connect_init(struct sock *sk) tp->snd_wnd = 0; tcp_init_wl(tp, 0); tcp_write_queue_purge(sk); - tp->snd_una = tp->write_seq; + WRITE_ONCE(tp->snd_una, tp->write_seq); tp->snd_sml = tp->write_seq; tp->snd_up = tp->write_seq; WRITE_ONCE(tp->snd_nxt, tp->write_seq); From ee43e957ce2ec77b2ec47fef28f3c0df6ab01a31 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Apr 2026 20:03:12 +0000 Subject: [PATCH 07/14] tcp: annotate data-races around tp->bytes_sent tcp_get_timestamping_opt_stats() intentionally runs lockless, we must add READ_ONCE() and WRITE_ONCE() annotations to keep KCSAN happy. Fixes: ba113c3aa79a ("tcp: add data bytes sent stats") Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20260416200319.3608680-8-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp.c | 2 +- net/ipv4/tcp_output.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 729936d13a5c..f999b86851cd 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4461,7 +4461,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk, READ_ONCE(tp->write_seq) - READ_ONCE(tp->snd_una))); nla_put_u8(stats, TCP_NLA_CA_STATE, inet_csk(sk)->icsk_ca_state); - nla_put_u64_64bit(stats, TCP_NLA_BYTES_SENT, tp->bytes_sent, + nla_put_u64_64bit(stats, TCP_NLA_BYTES_SENT, READ_ONCE(tp->bytes_sent), TCP_NLA_PAD); nla_put_u64_64bit(stats, TCP_NLA_BYTES_RETRANS, tp->bytes_retrans, TCP_NLA_PAD); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 9f83c7e4acab..87af4731df87 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1690,7 +1690,8 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, tcp_event_data_sent(tp, sk); WRITE_ONCE(tp->data_segs_out, tp->data_segs_out + tcp_skb_pcount(skb)); - tp->bytes_sent += skb->len - tcp_header_size; + WRITE_ONCE(tp->bytes_sent, + tp->bytes_sent + skb->len - tcp_header_size); } if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq) From 5efc7b9f7cbd43401f1af81d3d7f2be00f93390d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Apr 2026 20:03:13 +0000 Subject: [PATCH 08/14] tcp: annotate data-races around tp->bytes_retrans tcp_get_timestamping_opt_stats() intentionally runs lockless, we must add READ_ONCE() and WRITE_ONCE() annotations to keep KCSAN happy. Fixes: fb31c9b9f6c8 ("tcp: add data bytes retransmitted stats") Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20260416200319.3608680-9-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp.c | 4 ++-- net/ipv4/tcp_output.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f999b86851cd..8c84639dc54b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4463,8 +4463,8 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk, nla_put_u64_64bit(stats, TCP_NLA_BYTES_SENT, READ_ONCE(tp->bytes_sent), TCP_NLA_PAD); - nla_put_u64_64bit(stats, TCP_NLA_BYTES_RETRANS, tp->bytes_retrans, - TCP_NLA_PAD); + nla_put_u64_64bit(stats, TCP_NLA_BYTES_RETRANS, + READ_ONCE(tp->bytes_retrans), TCP_NLA_PAD); nla_put_u32(stats, TCP_NLA_DSACK_DUPS, tp->dsack_dups); nla_put_u32(stats, TCP_NLA_REORD_SEEN, tp->reord_seen); nla_put_u32(stats, TCP_NLA_SRTT, tp->srtt_us >> 3); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 87af4731df87..f9d8755705f7 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3645,7 +3645,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); WRITE_ONCE(tp->total_retrans, tp->total_retrans + segs); - tp->bytes_retrans += skb->len; + WRITE_ONCE(tp->bytes_retrans, tp->bytes_retrans + skb->len); /* make sure skb->data is aligned on arches that require it * and check if ack-trimming & collapsing extended the headroom From a984705ca88b976bf1087978fd98b7f3993da88c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Apr 2026 20:03:14 +0000 Subject: [PATCH 09/14] tcp: annotate data-races around tp->dsack_dups tcp_get_timestamping_opt_stats() intentionally runs lockless, we must add READ_ONCE() and WRITE_ONCE() annotations to keep KCSAN happy. Fixes: 7e10b6554ff2 ("tcp: add dsack blocks received stats") Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20260416200319.3608680-10-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp.c | 2 +- net/ipv4/tcp_input.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 8c84639dc54b..57c4dcc8bfe9 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4465,7 +4465,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk, TCP_NLA_PAD); nla_put_u64_64bit(stats, TCP_NLA_BYTES_RETRANS, READ_ONCE(tp->bytes_retrans), TCP_NLA_PAD); - nla_put_u32(stats, TCP_NLA_DSACK_DUPS, tp->dsack_dups); + nla_put_u32(stats, TCP_NLA_DSACK_DUPS, READ_ONCE(tp->dsack_dups)); nla_put_u32(stats, TCP_NLA_REORD_SEEN, tp->reord_seen); nla_put_u32(stats, TCP_NLA_SRTT, tp->srtt_us >> 3); nla_put_u16(stats, TCP_NLA_TIMEOUT_REHASH, tp->timeout_rehash); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index edb5013873e0..65b3ecc6be4b 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1246,7 +1246,7 @@ static u32 tcp_dsack_seen(struct tcp_sock *tp, u32 start_seq, else if (tp->tlp_high_seq && tp->tlp_high_seq == end_seq) state->flag |= FLAG_DSACK_TLP; - tp->dsack_dups += dup_segs; + WRITE_ONCE(tp->dsack_dups, tp->dsack_dups + dup_segs); /* Skip the DSACK if dup segs weren't retransmitted by sender */ if (tp->dsack_dups > tp->total_retrans) return 0; From 62585690e6b2a112c408fe25f142b246ac833c42 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Apr 2026 20:03:15 +0000 Subject: [PATCH 10/14] tcp: annotate data-races around tp->reord_seen tcp_get_timestamping_opt_stats() intentionally runs lockless, we must add READ_ONCE() and WRITE_ONCE() annotations to keep KCSAN happy. Fixes: 7ec65372ca53 ("tcp: add stat of data packet reordering events") Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20260416200319.3608680-11-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp.c | 2 +- net/ipv4/tcp_input.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 57c4dcc8bfe9..39a4b06e36bb 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4466,7 +4466,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk, nla_put_u64_64bit(stats, TCP_NLA_BYTES_RETRANS, READ_ONCE(tp->bytes_retrans), TCP_NLA_PAD); nla_put_u32(stats, TCP_NLA_DSACK_DUPS, READ_ONCE(tp->dsack_dups)); - nla_put_u32(stats, TCP_NLA_REORD_SEEN, tp->reord_seen); + nla_put_u32(stats, TCP_NLA_REORD_SEEN, READ_ONCE(tp->reord_seen)); nla_put_u32(stats, TCP_NLA_SRTT, tp->srtt_us >> 3); nla_put_u16(stats, TCP_NLA_TIMEOUT_REHASH, tp->timeout_rehash); nla_put_u32(stats, TCP_NLA_BYTES_NOTSENT, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 65b3ecc6be4b..896a5a5a6b1a 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1299,7 +1299,7 @@ static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq, } /* This exciting event is worth to be remembered. 8) */ - tp->reord_seen++; + WRITE_ONCE(tp->reord_seen, tp->reord_seen + 1); NET_INC_STATS(sock_net(sk), ts ? LINUX_MIB_TCPTSREORDER : LINUX_MIB_TCPSACKREORDER); } @@ -2443,7 +2443,7 @@ static void tcp_check_reno_reordering(struct sock *sk, const int addend) WRITE_ONCE(tp->reordering, min_t(u32, tp->packets_out + addend, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering))); - tp->reord_seen++; + WRITE_ONCE(tp->reord_seen, tp->reord_seen + 1); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER); } From 290b693ce7c9d48588d88b15a782a3efc6fa036b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Apr 2026 20:03:16 +0000 Subject: [PATCH 11/14] tcp: annotate data-races around tp->srtt_us tcp_get_timestamping_opt_stats() intentionally runs lockless, we must add READ_ONCE() and WRITE_ONCE() annotations to keep KCSAN happy. Fixes: e8bd8fca6773 ("tcp: add SRTT to SCM_TIMESTAMPING_OPT_STATS") Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20260416200319.3608680-12-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp.c | 5 +++-- net/ipv4/tcp_input.c | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 39a4b06e36bb..541bd0d2d8c4 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3623,7 +3623,8 @@ static void tcp_enable_tx_delay(struct sock *sk, int val) if (delta && sk->sk_state == TCP_ESTABLISHED) { s64 srtt = (s64)tp->srtt_us + delta; - tp->srtt_us = clamp_t(s64, srtt, 1, ~0U); + WRITE_ONCE(tp->srtt_us, + clamp_t(s64, srtt, 1, ~0U)); /* Note: does not deal with non zero icsk_backoff */ tcp_set_rto(sk); @@ -4467,7 +4468,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk, READ_ONCE(tp->bytes_retrans), TCP_NLA_PAD); nla_put_u32(stats, TCP_NLA_DSACK_DUPS, READ_ONCE(tp->dsack_dups)); nla_put_u32(stats, TCP_NLA_REORD_SEEN, READ_ONCE(tp->reord_seen)); - nla_put_u32(stats, TCP_NLA_SRTT, tp->srtt_us >> 3); + nla_put_u32(stats, TCP_NLA_SRTT, READ_ONCE(tp->srtt_us) >> 3); nla_put_u16(stats, TCP_NLA_TIMEOUT_REHASH, tp->timeout_rehash); nla_put_u32(stats, TCP_NLA_BYTES_NOTSENT, max_t(int, 0, tp->write_seq - tp->snd_nxt)); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 896a5a5a6b1a..e04ae105893c 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1132,7 +1132,7 @@ static void tcp_rtt_estimator(struct sock *sk, long mrtt_us) tcp_bpf_rtt(sk, mrtt_us, srtt); } - tp->srtt_us = max(1U, srtt); + WRITE_ONCE(tp->srtt_us, max(1U, srtt)); } void tcp_update_pacing_rate(struct sock *sk) From 71c675358b711bbfd8528949249419dc2dfa4ce1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Apr 2026 20:03:17 +0000 Subject: [PATCH 12/14] tcp: annotate data-races around tp->timeout_rehash tcp_get_timestamping_opt_stats() intentionally runs lockless, we must add READ_ONCE() and WRITE_ONCE() annotations to keep KCSAN happy. Fixes: 32efcc06d2a1 ("tcp: export count for rehash attempts") Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20260416200319.3608680-13-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp.c | 3 ++- net/ipv4/tcp_timer.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 541bd0d2d8c4..192e95b71ce9 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4469,7 +4469,8 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk, nla_put_u32(stats, TCP_NLA_DSACK_DUPS, READ_ONCE(tp->dsack_dups)); nla_put_u32(stats, TCP_NLA_REORD_SEEN, READ_ONCE(tp->reord_seen)); nla_put_u32(stats, TCP_NLA_SRTT, READ_ONCE(tp->srtt_us) >> 3); - nla_put_u16(stats, TCP_NLA_TIMEOUT_REHASH, tp->timeout_rehash); + nla_put_u16(stats, TCP_NLA_TIMEOUT_REHASH, + READ_ONCE(tp->timeout_rehash)); nla_put_u32(stats, TCP_NLA_BYTES_NOTSENT, max_t(int, 0, tp->write_seq - tp->snd_nxt)); nla_put_u64_64bit(stats, TCP_NLA_EDT, orig_skb->skb_mstamp_ns, diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index ea99988795e7..8d791a954cd6 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -297,7 +297,7 @@ static int tcp_write_timeout(struct sock *sk) } if (sk_rethink_txhash(sk)) { - tp->timeout_rehash++; + WRITE_ONCE(tp->timeout_rehash, tp->timeout_rehash + 1); __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTIMEOUTREHASH); } From 3a63b3d160560ef51e43fb4c880a5cde8078053c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Apr 2026 20:03:18 +0000 Subject: [PATCH 13/14] tcp: annotate data-races around (tp->write_seq - tp->snd_nxt) tcp_get_timestamping_opt_stats() intentionally runs lockless, we must add READ_ONCE() annotations to keep KCSAN happy. WRITE_ONCE() annotations are already present. Fixes: e08ab0b377a1 ("tcp: add bytes not sent to SCM_TIMESTAMPING_OPT_STATS") Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20260416200319.3608680-14-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 192e95b71ce9..68894c03f262 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4472,7 +4472,8 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk, nla_put_u16(stats, TCP_NLA_TIMEOUT_REHASH, READ_ONCE(tp->timeout_rehash)); nla_put_u32(stats, TCP_NLA_BYTES_NOTSENT, - max_t(int, 0, tp->write_seq - tp->snd_nxt)); + max_t(int, 0, + READ_ONCE(tp->write_seq) - READ_ONCE(tp->snd_nxt))); nla_put_u64_64bit(stats, TCP_NLA_EDT, orig_skb->skb_mstamp_ns, TCP_NLA_PAD); if (ack_skb) From 9e89b9d03a2d2e30dcca166d5af52f9a8eceab25 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Apr 2026 20:03:19 +0000 Subject: [PATCH 14/14] tcp: annotate data-races around tp->plb_rehash tcp_get_timestamping_opt_stats() intentionally runs lockless, we must add READ_ONCE() and WRITE_ONCE() annotations to keep KCSAN happy. Fixes: 29c1c44646ae ("tcp: add u32 counter in tcp_sock and an SNMP counter for PLB") Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20260416200319.3608680-15-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp.c | 3 ++- net/ipv4/tcp_plb.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 68894c03f262..7fbf2fca5eb2 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4480,7 +4480,8 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk, nla_put_u8(stats, TCP_NLA_TTL, tcp_skb_ttl_or_hop_limit(ack_skb)); - nla_put_u32(stats, TCP_NLA_REHASH, tp->plb_rehash + tp->timeout_rehash); + nla_put_u32(stats, TCP_NLA_REHASH, + READ_ONCE(tp->plb_rehash) + READ_ONCE(tp->timeout_rehash)); return stats; } diff --git a/net/ipv4/tcp_plb.c b/net/ipv4/tcp_plb.c index 68ccdb9a5412..c11a0cd3f8fe 100644 --- a/net/ipv4/tcp_plb.c +++ b/net/ipv4/tcp_plb.c @@ -80,7 +80,7 @@ void tcp_plb_check_rehash(struct sock *sk, struct tcp_plb_state *plb) sk_rethink_txhash(sk); plb->consec_cong_rounds = 0; - tcp_sk(sk)->plb_rehash++; + WRITE_ONCE(tcp_sk(sk)->plb_rehash, tcp_sk(sk)->plb_rehash + 1); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPLBREHASH); } EXPORT_SYMBOL_GPL(tcp_plb_check_rehash);