Merge branch 'mptcp-make-add_addr-retransmission-timeout-adaptive'

Matthieu Baerts says:

====================
mptcp: make ADD_ADDR retransmission timeout adaptive

Currently, the MPTCP ADD_ADDR notifications are retransmitted after a
fixed timeout controlled by the net.mptcp.add_addr_timeout sysctl knob,
if the corresponding "echo" packets are not received before. This can be
too slow (or too quick), especially with a too cautious default value
set to 2 minutes.

- Patch 1: make ADD_ADDR retransmission timeout adaptive, using the
  TCP's retransmission timeout. The corresponding sysctl knob is now
  used as a maximum value.

- Patch 2: now that these ADD_ADDR retransmissions can happen faster,
  all MPTCP Join subtests checking ADD_ADDR counters accept more
  ADD_ADDR than expected (if any). This is aligned with the previous
  behaviour, when the ADD_ADDR RTO was lowered down to 1 second.

- Patch 3: Some CIs have reported that some MPTCP Join signalling tests
  were unstable. It seems that it is due to the time it can take in slow
  environments to send a bunch of ADD_ADDR notifications and wait each
  time for their echo reply. Use a longer transfer to avoid such errors.

v1: https://lore.kernel.org/d5397026-92eb-4a43-9534-954b43ab9305@kernel.org
====================

Link: https://patch.msgid.link/20250907-net-next-mptcp-add_addr-retrans-adapt-v1-0-824cc805772b@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2025-09-09 18:57:49 -07:00
commit b90c7ca4f9
3 changed files with 40 additions and 21 deletions

View File

@ -8,9 +8,11 @@ MPTCP Sysfs variables
===============================
add_addr_timeout - INTEGER (seconds)
Set the timeout after which an ADD_ADDR control message will be
resent to an MPTCP peer that has not acknowledged a previous
ADD_ADDR message.
Set the maximum value of timeout after which an ADD_ADDR control message
will be resent to an MPTCP peer that has not acknowledged a previous
ADD_ADDR message. A dynamically estimated retransmission timeout based
on the estimated connection round-trip-time is used if this value is
lower than the maximum one.
Do not retransmit if set to 0.

View File

@ -268,6 +268,27 @@ int mptcp_pm_mp_prio_send_ack(struct mptcp_sock *msk,
return -EINVAL;
}
static unsigned int mptcp_adjust_add_addr_timeout(struct mptcp_sock *msk)
{
const struct net *net = sock_net((struct sock *)msk);
unsigned int rto = mptcp_get_add_addr_timeout(net);
struct mptcp_subflow_context *subflow;
unsigned int max = 0;
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
struct inet_connection_sock *icsk = inet_csk(ssk);
if (icsk->icsk_rto > max)
max = icsk->icsk_rto;
}
if (max && max < rto)
rto = max;
return rto;
}
static void mptcp_pm_add_timer(struct timer_list *timer)
{
struct mptcp_pm_add_entry *entry = timer_container_of(entry, timer,
@ -292,7 +313,7 @@ static void mptcp_pm_add_timer(struct timer_list *timer)
goto out;
}
timeout = mptcp_get_add_addr_timeout(sock_net(sk));
timeout = mptcp_adjust_add_addr_timeout(msk);
if (!timeout)
goto out;
@ -307,7 +328,7 @@ static void mptcp_pm_add_timer(struct timer_list *timer)
if (entry->retrans_times < ADD_ADDR_RETRANS_MAX)
sk_reset_timer(sk, timer,
jiffies + timeout);
jiffies + (timeout << entry->retrans_times));
spin_unlock_bh(&msk->pm.lock);
@ -348,7 +369,6 @@ bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
{
struct mptcp_pm_add_entry *add_entry = NULL;
struct sock *sk = (struct sock *)msk;
struct net *net = sock_net(sk);
unsigned int timeout;
lockdep_assert_held(&msk->pm.lock);
@ -374,7 +394,7 @@ bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
timer_setup(&add_entry->add_timer, mptcp_pm_add_timer, 0);
reset_timer:
timeout = mptcp_get_add_addr_timeout(net);
timeout = mptcp_adjust_add_addr_timeout(msk);
if (timeout)
sk_reset_timer(sk, &add_entry->add_timer, jiffies + timeout);

View File

@ -358,6 +358,7 @@ reset_with_add_addr_timeout()
tables="${ip6tables}"
fi
# set a maximum, to avoid too long timeout with exponential backoff
ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=1
if ! ip netns exec $ns2 $tables -A OUTPUT -p tcp \
@ -1669,7 +1670,6 @@ chk_add_nr()
local tx=""
local rx=""
local count
local timeout
if [[ $ns_invert = "invert" ]]; then
ns_tx=$ns2
@ -1678,15 +1678,13 @@ chk_add_nr()
rx=" server"
fi
timeout=$(ip netns exec ${ns_tx} sysctl -n net.mptcp.add_addr_timeout)
print_check "add addr rx${rx}"
count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtAddAddr")
if [ -z "$count" ]; then
print_skip
# if the test configured a short timeout tolerate greater then expected
# add addrs options, due to retransmissions
elif [ "$count" != "$add_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_nr" ]; }; then
# Tolerate more ADD_ADDR then expected (if any), due to retransmissions
elif [ "$count" != "$add_nr" ] &&
{ [ "$add_nr" -eq 0 ] || [ "$count" -lt "$add_nr" ]; }; then
fail_test "got $count ADD_ADDR[s] expected $add_nr"
else
print_ok
@ -1774,18 +1772,15 @@ chk_add_tx_nr()
{
local add_tx_nr=$1
local echo_tx_nr=$2
local timeout
local count
timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout)
print_check "add addr tx"
count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtAddAddrTx")
if [ -z "$count" ]; then
print_skip
# if the test configured a short timeout tolerate greater then expected
# add addrs options, due to retransmissions
elif [ "$count" != "$add_tx_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_tx_nr" ]; }; then
# Tolerate more ADD_ADDR then expected (if any), due to retransmissions
elif [ "$count" != "$add_tx_nr" ] &&
{ [ "$add_tx_nr" -eq 0 ] || [ "$count" -lt "$add_tx_nr" ]; }; then
fail_test "got $count ADD_ADDR[s] TX, expected $add_tx_nr"
else
print_ok
@ -2273,7 +2268,8 @@ signal_address_tests()
pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
pm_nl_add_endpoint $ns1 10.0.4.1 flags signal
pm_nl_set_limits $ns2 3 3
run_tests $ns1 $ns2 10.0.1.1
speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 3 3 3
chk_add_nr 3 3
fi
@ -2285,7 +2281,8 @@ signal_address_tests()
pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
pm_nl_add_endpoint $ns1 10.0.14.1 flags signal
pm_nl_set_limits $ns2 3 3
run_tests $ns1 $ns2 10.0.1.1
speed=slow \
run_tests $ns1 $ns2 10.0.1.1
join_syn_tx=3 \
chk_join_nr 1 1 1
chk_add_nr 3 3