Merge branch 'mptcp-pm-misc-fixes-for-v7-1-rc3'

Matthieu Baerts says:

====================
mptcp: pm: misc. fixes for v7.1-rc3

Here are various fixes, mainly related to ADD_ADDRs:

- Patch 1: save ADD_ADDR for rtx with ID0 when needed. A fix for v6.1.

- Patch 2: remove unneeded exception for ID 0. A fix for v5.10.

- Patches 3-5: fix potential data-race and leaks during ADD_ADDR rtx. A
  fix for v5.10.

- Patch 6: resched blocked ADD_ADDR rtx after a more appropriated
  timeout, not after 15 seconds. A fix for v5.10.

- Patch 7: skip inactive subflows when when looking at the max RTO. A
  fix for v6.18.

- Patch 8: avoid iterating over all subflows when there is no need to. A
  fix for v6.18.

- Patch 9: skip closed subflows when looking at sending MP_PRIO. A fix
  for v5.17.

- Patch 10: properly catch errors when using check_output() in the
  selftests. A fix for v6.9.

- Patch 11: skip the 'unknown' flag test when 'ip mptcp' is used. A fix
  for v6.10.
====================

Link: https://patch.msgid.link/20260505-net-mptcp-pm-fixes-7-1-rc3-v1-0-fca8091060a4@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2026-05-06 18:16:49 -07:00
commit 2b1f48cc0f
4 changed files with 73 additions and 38 deletions

View File

@ -16,6 +16,7 @@ struct mptcp_pm_add_entry {
struct list_head list;
struct mptcp_addr_info addr;
u8 retrans_times;
bool timer_done;
struct timer_list add_timer;
struct mptcp_sock *sock;
struct rcu_head rcu;
@ -283,6 +284,9 @@ int mptcp_pm_mp_prio_send_ack(struct mptcp_sock *msk,
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
struct mptcp_addr_info local, remote;
if (!__mptcp_subflow_active(subflow))
continue;
mptcp_local_address((struct sock_common *)ssk, &local);
if (!mptcp_addresses_equal(&local, addr, addr->port))
continue;
@ -305,18 +309,31 @@ static unsigned int mptcp_adjust_add_addr_timeout(struct mptcp_sock *msk)
const struct net *net = sock_net((struct sock *)msk);
unsigned int rto = mptcp_get_add_addr_timeout(net);
struct mptcp_subflow_context *subflow;
unsigned int max = 0;
unsigned int max = 0, max_stale = 0;
if (!rto)
return 0;
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
struct inet_connection_sock *icsk = inet_csk(ssk);
if (icsk->icsk_rto > max)
if (!__mptcp_subflow_active(subflow))
continue;
if (unlikely(subflow->stale)) {
if (icsk->icsk_rto > max_stale)
max_stale = icsk->icsk_rto;
} else if (icsk->icsk_rto > max) {
max = icsk->icsk_rto;
}
}
if (max && max < rto)
rto = max;
if (max)
return min(max, rto);
if (max_stale)
return min(max_stale, rto);
return rto;
}
@ -327,26 +344,22 @@ static void mptcp_pm_add_timer(struct timer_list *timer)
add_timer);
struct mptcp_sock *msk = entry->sock;
struct sock *sk = (struct sock *)msk;
unsigned int timeout;
unsigned int timeout = 0;
pr_debug("msk=%p\n", msk);
if (!msk)
return;
bh_lock_sock(sk);
if (unlikely(inet_sk_state_load(sk) == TCP_CLOSE))
goto out;
if (inet_sk_state_load(sk) == TCP_CLOSE)
return;
if (!entry->addr.id)
return;
if (mptcp_pm_should_add_signal_addr(msk)) {
sk_reset_timer(sk, timer, jiffies + TCP_RTO_MAX / 8);
if (sock_owned_by_user(sk)) {
/* Try again later. */
timeout = HZ / 20;
goto out;
}
timeout = mptcp_adjust_add_addr_timeout(msk);
if (!timeout)
if (!timeout || mptcp_pm_should_add_signal_addr(msk))
goto out;
spin_lock_bh(&msk->pm.lock);
@ -359,8 +372,9 @@ static void mptcp_pm_add_timer(struct timer_list *timer)
}
if (entry->retrans_times < ADD_ADDR_RETRANS_MAX)
sk_reset_timer(sk, timer,
jiffies + (timeout << entry->retrans_times));
timeout <<= entry->retrans_times;
else
timeout = 0;
spin_unlock_bh(&msk->pm.lock);
@ -368,7 +382,13 @@ static void mptcp_pm_add_timer(struct timer_list *timer)
mptcp_pm_subflow_established(msk);
out:
__sock_put(sk);
if (timeout)
sk_reset_timer(sk, timer, jiffies + timeout);
else
/* if sock_put calls sk_free: avoid waiting for this timer */
entry->timer_done = true;
bh_unlock_sock(sk);
sock_put(sk);
}
struct mptcp_pm_add_entry *
@ -431,6 +451,7 @@ bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
timer_setup(&add_entry->add_timer, mptcp_pm_add_timer, 0);
reset_timer:
add_entry->timer_done = false;
timeout = mptcp_adjust_add_addr_timeout(msk);
if (timeout)
sk_reset_timer(sk, &add_entry->add_timer, jiffies + timeout);
@ -451,7 +472,8 @@ static void mptcp_pm_free_anno_list(struct mptcp_sock *msk)
spin_unlock_bh(&msk->pm.lock);
list_for_each_entry_safe(entry, tmp, &free_list, list) {
sk_stop_timer_sync(sk, &entry->add_timer);
if (!entry->timer_done)
sk_stop_timer_sync(sk, &entry->add_timer);
kfree_rcu(entry, rcu);
}
}

View File

@ -347,6 +347,8 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
/* check first for announce */
if (msk->pm.add_addr_signaled < endp_signal_max) {
u8 endp_id;
/* due to racing events on both ends we can reach here while
* previous add address is still running: if we invoke now
* mptcp_pm_announce_addr(), that will fail and the
@ -360,19 +362,20 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
if (!select_signal_address(pernet, msk, &local))
goto subflow;
/* Special case for ID0: set the correct ID */
endp_id = local.addr.id;
if (endp_id == msk->mpc_endpoint_id)
local.addr.id = 0;
/* If the alloc fails, we are on memory pressure, not worth
* continuing, and trying to create subflows.
*/
if (!mptcp_pm_alloc_anno_list(msk, &local.addr))
return;
__clear_bit(local.addr.id, msk->pm.id_avail_bitmap);
__clear_bit(endp_id, msk->pm.id_avail_bitmap);
msk->pm.add_addr_signaled++;
/* Special case for ID0: set the correct ID */
if (local.addr.id == msk->mpc_endpoint_id)
local.addr.id = 0;
mptcp_pm_announce_addr(msk, &local.addr, false);
mptcp_pm_addr_send_ack(msk);

View File

@ -474,20 +474,24 @@ mptcp_lib_wait_local_port_listen() {
wait_local_port_listen "${@}" "tcp"
}
# $1: error file, $2: cmd, $3: expected msg, [$4: expected error]
mptcp_lib_check_output() {
local err="${1}"
local cmd="${2}"
local expected="${3}"
local exp_error="${4:-0}"
local cmd_ret=0
local out
if ! out=$(${cmd} 2>"${err}"); then
cmd_ret=${?}
fi
out=$(${cmd} 2>"${err}") || cmd_ret=1
if [ ${cmd_ret} -ne 0 ]; then
mptcp_lib_pr_fail "command execution '${cmd}' stderr"
cat "${err}"
if [ "${cmd_ret}" != "${exp_error}" ]; then
mptcp_lib_pr_fail "unexpected returned code for '${cmd}', info:"
if [ "${exp_error}" = 0 ]; then
cat "${err}"
else
echo "${out}"
fi
return 2
elif [ "${out}" = "${expected}" ]; then
return 0

View File

@ -122,10 +122,12 @@ check()
local cmd="$1"
local expected="$2"
local msg="$3"
local exp_error="$4"
local rc=0
mptcp_lib_print_title "$msg"
mptcp_lib_check_output "${err}" "${cmd}" "${expected}" || rc=${?}
mptcp_lib_check_output "${err}" "${cmd}" "${expected}" "${exp_error}" ||
rc=${?}
if [ ${rc} -eq 2 ]; then
mptcp_lib_result_fail "${msg} # error ${rc}"
ret=${KSFT_FAIL}
@ -158,13 +160,13 @@ check "show_endpoints" \
"3,10.0.1.3,signal backup")" "dump addrs"
del_endpoint 2
check "get_endpoint 2" "" "simple del addr"
check "get_endpoint 2" "" "simple del addr" 1
check "show_endpoints" \
"$(format_endpoints "1,10.0.1.1" \
"3,10.0.1.3,signal backup")" "dump addrs after del"
add_endpoint 10.0.1.3 2>/dev/null
check "get_endpoint 4" "" "duplicate addr"
check "get_endpoint 4" "" "duplicate addr" 1
add_endpoint 10.0.1.4 flags signal
check "get_endpoint 4" "$(format_endpoints "4,10.0.1.4,signal")" "id addr increment"
@ -173,7 +175,7 @@ for i in $(seq 5 9); do
add_endpoint "10.0.1.${i}" flags signal >/dev/null 2>&1
done
check "get_endpoint 9" "$(format_endpoints "9,10.0.1.9,signal")" "hard addr limit"
check "get_endpoint 10" "" "above hard addr limit"
check "get_endpoint 10" "" "above hard addr limit" 1
del_endpoint 9
for i in $(seq 10 255); do
@ -192,9 +194,13 @@ check "show_endpoints" \
flush_endpoint
check "show_endpoints" "" "flush addrs"
add_endpoint 10.0.1.1 flags unknown
check "show_endpoints" "$(format_endpoints "1,10.0.1.1")" "ignore unknown flags"
flush_endpoint
# "unknown" flag is only supported by pm_nl_ctl
if ! mptcp_lib_is_ip_mptcp; then
add_endpoint 10.0.1.1 flags unknown
check "show_endpoints" "$(format_endpoints "1,10.0.1.1")" \
"ignore unknown flags"
flush_endpoint
fi
set_limits 9 1 2>/dev/null
check "get_limits" "${default_limits}" "rcv addrs above hard limit"