mirror of
https://github.com/torvalds/linux.git
synced 2026-05-21 21:37:25 +02:00
tcp: remove obsolete and unused RFC3517/RFC6675 loss recovery code
RACK-TLP loss detection has been enabled as the default loss detection
algorithm for Linux TCP since 2018, in:
commit b38a51fec1 ("tcp: disable RFC6675 loss detection")
In case users ran into unexpected bugs or performance regressions,
that commit allowed Linux system administrators to revert to using
RFC3517/RFC6675 loss recovery by setting net.ipv4.tcp_recovery to 0.
In the seven years since 2018, our team has not heard reports of
anyone reverting Linux TCP to use RFC3517/RFC6675 loss recovery, and
we can't find any record in web searches of such a revert.
RACK-TLP was published as a standards-track RFC, RFC8985, in February
2021.
Several other major TCP implementations have default-enabled RACK-TLP
at this point as well.
RACK-TLP offers several significant performance advantages over
RFC3517/RFC6675 loss recovery, including much better performance in
the common cases of tail drops, lost retransmissions, and reordering.
It is now time to remove the obsolete and unused RFC3517/RFC6675 loss
recovery code. This will allow a substantial simplification of the
Linux TCP code base, and removes 12 bytes of state in every tcp_sock
for 64-bit machines (8 bytes on 32-bit machines).
To arrange the commits in reasonable sizes, this patch series is split
into 3 commits. The following 2 commits remove bookkeeping state and
code that is no longer needed after this removal of RFC3517/RFC6675
loss recovery.
Suggested-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Reviewed-by: Yuchung Cheng <ycheng@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20250615001435.2390793-2-ncardwell.sw@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
parent
90b4e1cf6d
commit
1c120191dc
|
|
@ -645,9 +645,11 @@ tcp_recovery - INTEGER
|
|||
features.
|
||||
|
||||
========= =============================================================
|
||||
RACK: 0x1 enables the RACK loss detection for fast detection of lost
|
||||
retransmissions and tail drops. It also subsumes and disables
|
||||
RFC6675 recovery for SACK connections.
|
||||
RACK: 0x1 enables RACK loss detection, for fast detection of lost
|
||||
retransmissions and tail drops, and resilience to
|
||||
reordering. currently, setting this bit to 0 has no
|
||||
effect, since RACK is the only supported loss detection
|
||||
algorithm.
|
||||
|
||||
RACK: 0x2 makes RACK's reordering window static (min_rtt/4).
|
||||
|
||||
|
|
|
|||
|
|
@ -2151,12 +2151,6 @@ static inline void tcp_init_undo(struct tcp_sock *tp)
|
|||
tp->undo_retrans = -1;
|
||||
}
|
||||
|
||||
static bool tcp_is_rack(const struct sock *sk)
|
||||
{
|
||||
return READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) &
|
||||
TCP_RACK_LOSS_DETECTION;
|
||||
}
|
||||
|
||||
/* If we detect SACK reneging, forget all SACK information
|
||||
* and reset tags completely, otherwise preserve SACKs. If receiver
|
||||
* dropped its ofo queue, we will know this due to reneging detection.
|
||||
|
|
@ -2182,8 +2176,7 @@ static void tcp_timeout_mark_lost(struct sock *sk)
|
|||
skb_rbtree_walk_from(skb) {
|
||||
if (is_reneg)
|
||||
TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
|
||||
else if (tcp_is_rack(sk) && skb != head &&
|
||||
tcp_rack_skb_timeout(tp, skb, 0) > 0)
|
||||
else if (skb != head && tcp_rack_skb_timeout(tp, skb, 0) > 0)
|
||||
continue; /* Don't mark recently sent ones lost yet */
|
||||
tcp_mark_skb_lost(sk, skb);
|
||||
}
|
||||
|
|
@ -2264,22 +2257,6 @@ static bool tcp_check_sack_reneging(struct sock *sk, int *ack_flag)
|
|||
return false;
|
||||
}
|
||||
|
||||
/* Heurestics to calculate number of duplicate ACKs. There's no dupACKs
|
||||
* counter when SACK is enabled (without SACK, sacked_out is used for
|
||||
* that purpose).
|
||||
*
|
||||
* With reordering, holes may still be in flight, so RFC3517 recovery
|
||||
* uses pure sacked_out (total number of SACKed segments) even though
|
||||
* it violates the RFC that uses duplicate ACKs, often these are equal
|
||||
* but when e.g. out-of-window ACKs or packet duplication occurs,
|
||||
* they differ. Since neither occurs due to loss, TCP should really
|
||||
* ignore them.
|
||||
*/
|
||||
static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
|
||||
{
|
||||
return tp->sacked_out + 1;
|
||||
}
|
||||
|
||||
/* Linux NewReno/SACK/ECN state machine.
|
||||
* --------------------------------------
|
||||
*
|
||||
|
|
@ -2332,13 +2309,7 @@ static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
|
|||
*
|
||||
* If the receiver supports SACK:
|
||||
*
|
||||
* RFC6675/3517: It is the conventional algorithm. A packet is
|
||||
* considered lost if the number of higher sequence packets
|
||||
* SACKed is greater than or equal the DUPACK thoreshold
|
||||
* (reordering). This is implemented in tcp_mark_head_lost and
|
||||
* tcp_update_scoreboard.
|
||||
*
|
||||
* RACK (draft-ietf-tcpm-rack-01): it is a newer algorithm
|
||||
* RACK (RFC8985): RACK is a newer loss detection algorithm
|
||||
* (2017-) that checks timing instead of counting DUPACKs.
|
||||
* Essentially a packet is considered lost if it's not S/ACKed
|
||||
* after RTT + reordering_window, where both metrics are
|
||||
|
|
@ -2353,8 +2324,8 @@ static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
|
|||
* is lost (NewReno). This heuristics are the same in NewReno
|
||||
* and SACK.
|
||||
*
|
||||
* Really tricky (and requiring careful tuning) part of algorithm
|
||||
* is hidden in functions tcp_time_to_recover() and tcp_xmit_retransmit_queue().
|
||||
* The really tricky (and requiring careful tuning) part of the algorithm
|
||||
* is hidden in the RACK code in tcp_recovery.c and tcp_xmit_retransmit_queue().
|
||||
* The first determines the moment _when_ we should reduce CWND and,
|
||||
* hence, slow down forward transmission. In fact, it determines the moment
|
||||
* when we decide that hole is caused by loss, rather than by a reorder.
|
||||
|
|
@ -2381,79 +2352,8 @@ static bool tcp_time_to_recover(struct sock *sk, int flag)
|
|||
{
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
|
||||
/* Trick#1: The loss is proven. */
|
||||
if (tp->lost_out)
|
||||
return true;
|
||||
|
||||
/* Not-A-Trick#2 : Classic rule... */
|
||||
if (!tcp_is_rack(sk) && tcp_dupack_heuristics(tp) > tp->reordering)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Detect loss in event "A" above by marking head of queue up as lost.
|
||||
* For RFC3517 SACK, a segment is considered lost if it
|
||||
* has at least tp->reordering SACKed seqments above it; "packets" refers to
|
||||
* the maximum SACKed segments to pass before reaching this limit.
|
||||
*/
|
||||
static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
|
||||
{
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
struct sk_buff *skb;
|
||||
int cnt;
|
||||
/* Use SACK to deduce losses of new sequences sent during recovery */
|
||||
const u32 loss_high = tp->snd_nxt;
|
||||
|
||||
WARN_ON(packets > tp->packets_out);
|
||||
skb = tp->lost_skb_hint;
|
||||
if (skb) {
|
||||
/* Head already handled? */
|
||||
if (mark_head && after(TCP_SKB_CB(skb)->seq, tp->snd_una))
|
||||
return;
|
||||
cnt = tp->lost_cnt_hint;
|
||||
} else {
|
||||
skb = tcp_rtx_queue_head(sk);
|
||||
cnt = 0;
|
||||
}
|
||||
|
||||
skb_rbtree_walk_from(skb) {
|
||||
/* TODO: do this better */
|
||||
/* this is not the most efficient way to do this... */
|
||||
tp->lost_skb_hint = skb;
|
||||
tp->lost_cnt_hint = cnt;
|
||||
|
||||
if (after(TCP_SKB_CB(skb)->end_seq, loss_high))
|
||||
break;
|
||||
|
||||
if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
|
||||
cnt += tcp_skb_pcount(skb);
|
||||
|
||||
if (cnt > packets)
|
||||
break;
|
||||
|
||||
if (!(TCP_SKB_CB(skb)->sacked & TCPCB_LOST))
|
||||
tcp_mark_skb_lost(sk, skb);
|
||||
|
||||
if (mark_head)
|
||||
break;
|
||||
}
|
||||
tcp_verify_left_out(tp);
|
||||
}
|
||||
|
||||
/* Account newly detected lost packet(s) */
|
||||
|
||||
static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
|
||||
{
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
|
||||
if (tcp_is_sack(tp)) {
|
||||
int sacked_upto = tp->sacked_out - tp->reordering;
|
||||
if (sacked_upto >= 0)
|
||||
tcp_mark_head_lost(sk, sacked_upto, 0);
|
||||
else if (fast_rexmit)
|
||||
tcp_mark_head_lost(sk, 1, 1);
|
||||
}
|
||||
/* Has loss detection marked at least one packet lost? */
|
||||
return tp->lost_out != 0;
|
||||
}
|
||||
|
||||
static bool tcp_tsopt_ecr_before(const struct tcp_sock *tp, u32 when)
|
||||
|
|
@ -2990,17 +2890,8 @@ static void tcp_process_loss(struct sock *sk, int flag, int num_dupack,
|
|||
*rexmit = REXMIT_LOST;
|
||||
}
|
||||
|
||||
static bool tcp_force_fast_retransmit(struct sock *sk)
|
||||
{
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
|
||||
return after(tcp_highest_sack_seq(tp),
|
||||
tp->snd_una + tp->reordering * tp->mss_cache);
|
||||
}
|
||||
|
||||
/* Undo during fast recovery after partial ACK. */
|
||||
static bool tcp_try_undo_partial(struct sock *sk, u32 prior_snd_una,
|
||||
bool *do_lost)
|
||||
static bool tcp_try_undo_partial(struct sock *sk, u32 prior_snd_una)
|
||||
{
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
|
||||
|
|
@ -3025,9 +2916,6 @@ static bool tcp_try_undo_partial(struct sock *sk, u32 prior_snd_una,
|
|||
tcp_undo_cwnd_reduction(sk, true);
|
||||
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO);
|
||||
tcp_try_keep_open(sk);
|
||||
} else {
|
||||
/* Partial ACK arrived. Force fast retransmit. */
|
||||
*do_lost = tcp_force_fast_retransmit(sk);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
|
@ -3041,7 +2929,7 @@ static void tcp_identify_packet_loss(struct sock *sk, int *ack_flag)
|
|||
|
||||
if (unlikely(tcp_is_reno(tp))) {
|
||||
tcp_newreno_mark_lost(sk, *ack_flag & FLAG_SND_UNA_ADVANCED);
|
||||
} else if (tcp_is_rack(sk)) {
|
||||
} else {
|
||||
u32 prior_retrans = tp->retrans_out;
|
||||
|
||||
if (tcp_rack_mark_lost(sk))
|
||||
|
|
@ -3068,10 +2956,8 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
|
|||
{
|
||||
struct inet_connection_sock *icsk = inet_csk(sk);
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
int fast_rexmit = 0, flag = *ack_flag;
|
||||
int flag = *ack_flag;
|
||||
bool ece_ack = flag & FLAG_ECE;
|
||||
bool do_lost = num_dupack || ((flag & FLAG_DATA_SACKED) &&
|
||||
tcp_force_fast_retransmit(sk));
|
||||
|
||||
if (!tp->packets_out && tp->sacked_out)
|
||||
tp->sacked_out = 0;
|
||||
|
|
@ -3120,7 +3006,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
|
|||
if (!(flag & FLAG_SND_UNA_ADVANCED)) {
|
||||
if (tcp_is_reno(tp))
|
||||
tcp_add_reno_sack(sk, num_dupack, ece_ack);
|
||||
} else if (tcp_try_undo_partial(sk, prior_snd_una, &do_lost))
|
||||
} else if (tcp_try_undo_partial(sk, prior_snd_una))
|
||||
return;
|
||||
|
||||
if (tcp_try_undo_dsack(sk))
|
||||
|
|
@ -3175,11 +3061,8 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
|
|||
|
||||
/* Otherwise enter Recovery state */
|
||||
tcp_enter_recovery(sk, ece_ack);
|
||||
fast_rexmit = 1;
|
||||
}
|
||||
|
||||
if (!tcp_is_rack(sk) && do_lost)
|
||||
tcp_update_scoreboard(sk, fast_rexmit);
|
||||
*rexmit = REXMIT_LOST;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user