Merge branch 'tcp-__tcp_close-changes'

Eric Dumazet says:

====================
tcp: __tcp_close() changes

First patch fixes a rare bug.

Second patch adds a corresponding packetdrill test.

Third patch is a small optimization.
====================

Link: https://patch.msgid.link/20250903084720.1168904-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2025-09-04 19:13:45 -07:00
commit abfa70b380
2 changed files with 39 additions and 6 deletions

View File

@ -3099,8 +3099,8 @@ bool tcp_check_oom(const struct sock *sk, int shift)
void __tcp_close(struct sock *sk, long timeout)
{
bool data_was_unread = false;
struct sk_buff *skb;
int data_was_unread = 0;
int state;
WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
@ -3118,13 +3118,14 @@ void __tcp_close(struct sock *sk, long timeout)
* descriptor close, not protocol-sourced closes, because the
* reader process may not have drained the data yet!
*/
while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq;
while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) {
u32 end_seq = TCP_SKB_CB(skb)->end_seq;
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
len--;
data_was_unread += len;
__kfree_skb(skb);
end_seq--;
if (after(end_seq, tcp_sk(sk)->copied_seq))
data_was_unread = true;
tcp_eat_recv_skb(sk, skb);
}
/* If socket has been already reset (e.g. in tcp_reset()) - kill it. */

View File

@ -0,0 +1,32 @@
// SPDX-License-Identifier: GPL-2.0
--mss=1000
`./defaults.sh`
// Initialize connection
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+0 bind(3, ..., ...) = 0
+0 listen(3, 1) = 0
+0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop>
+0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK>
+.1 < . 1:1(0) ack 1 win 32792
+0 accept(3, ..., ...) = 4
+0 < . 1:1001(1000) ack 1 win 32792
+0 > . 1:1(0) ack 1001
+0 read(4, ..., 1000) = 1000
// resend the payload + a FIN
+0 < F. 1:1001(1000) ack 1 win 32792
// Why do we have a delay and no dsack ?
+0~+.04 > . 1:1(0) ack 1002
+0 close(4) = 0
// According to RFC 2525, section 2.17
// we should _not_ send an RST here, because there was no data to consume.
+0 > F. 1:1(0) ack 1002