Merge branch 'mptcp-misc-fixes-for-v7-0-rc2'

Matthieu Baerts says:

====================
mptcp: misc fixes for v7.0-rc2

Here are various unrelated fixes:

- Patch 1: avoid bufferbloat in simult_flows selftest which can cause
  instabilities. A fix for v5.10.

- Patches 2-3: reduce RM_ADDR lost by not sending it over the same
  subflow as the one being removed, if possible. A fix for v5.13.

- Patches 4-5: avoid a WARN when using signal + subflow endpoints with a
  subflow limit of 0, and removing such endpoints during an active
  connection. A fix for v5.17.
====================

Link: https://patch.msgid.link/20260303-net-mptcp-misc-fixes-7-0-rc2-v1-0-4b5462b6f016@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2026-03-04 18:21:15 -08:00
commit ff2c591625
4 changed files with 108 additions and 16 deletions

View File

@ -212,9 +212,24 @@ void mptcp_pm_send_ack(struct mptcp_sock *msk,
spin_lock_bh(&msk->pm.lock);
}
void mptcp_pm_addr_send_ack(struct mptcp_sock *msk)
static bool subflow_in_rm_list(const struct mptcp_subflow_context *subflow,
const struct mptcp_rm_list *rm_list)
{
struct mptcp_subflow_context *subflow, *alt = NULL;
u8 i, id = subflow_get_local_id(subflow);
for (i = 0; i < rm_list->nr; i++) {
if (rm_list->ids[i] == id)
return true;
}
return false;
}
static void
mptcp_pm_addr_send_ack_avoid_list(struct mptcp_sock *msk,
const struct mptcp_rm_list *rm_list)
{
struct mptcp_subflow_context *subflow, *stale = NULL, *same_id = NULL;
msk_owned_by_me(msk);
lockdep_assert_held(&msk->pm.lock);
@ -224,19 +239,35 @@ void mptcp_pm_addr_send_ack(struct mptcp_sock *msk)
return;
mptcp_for_each_subflow(msk, subflow) {
if (__mptcp_subflow_active(subflow)) {
if (!subflow->stale) {
mptcp_pm_send_ack(msk, subflow, false, false);
return;
}
if (!__mptcp_subflow_active(subflow))
continue;
if (!alt)
alt = subflow;
if (unlikely(subflow->stale)) {
if (!stale)
stale = subflow;
} else if (unlikely(rm_list &&
subflow_in_rm_list(subflow, rm_list))) {
if (!same_id)
same_id = subflow;
} else {
goto send_ack;
}
}
if (alt)
mptcp_pm_send_ack(msk, alt, false, false);
if (same_id)
subflow = same_id;
else if (stale)
subflow = stale;
else
return;
send_ack:
mptcp_pm_send_ack(msk, subflow, false, false);
}
void mptcp_pm_addr_send_ack(struct mptcp_sock *msk)
{
mptcp_pm_addr_send_ack_avoid_list(msk, NULL);
}
int mptcp_pm_mp_prio_send_ack(struct mptcp_sock *msk,
@ -470,7 +501,7 @@ int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_
msk->pm.rm_list_tx = *rm_list;
rm_addr |= BIT(MPTCP_RM_ADDR_SIGNAL);
WRITE_ONCE(msk->pm.addr_signal, rm_addr);
mptcp_pm_addr_send_ack(msk);
mptcp_pm_addr_send_ack_avoid_list(msk, rm_list);
return 0;
}

View File

@ -418,6 +418,15 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
}
exit:
/* If an endpoint has both the signal and subflow flags, but it is not
* possible to create subflows -- the 'while' loop body above never
* executed -- then still mark the endp as used, which is somehow the
* case. This avoids issues later when removing the endpoint and calling
* __mark_subflow_endp_available(), which expects the increment here.
*/
if (signal_and_subflow && local.addr.id != msk->mpc_endpoint_id)
msk->pm.local_addr_used++;
mptcp_pm_nl_check_work_pending(msk);
}

View File

@ -104,6 +104,24 @@ CBPF_MPTCP_SUBOPTION_ADD_ADDR="14,
6 0 0 65535,
6 0 0 0"
# IPv4: TCP hdr of 48B, a first suboption of 12B (DACK8), the RM_ADDR suboption
# generated using "nfbpf_compile '(ip[32] & 0xf0) == 0xc0 && ip[53] == 0x0c &&
# (ip[66] & 0xf0) == 0x40'"
CBPF_MPTCP_SUBOPTION_RM_ADDR="13,
48 0 0 0,
84 0 0 240,
21 0 9 64,
48 0 0 32,
84 0 0 240,
21 0 6 192,
48 0 0 53,
21 0 4 12,
48 0 0 66,
84 0 0 240,
21 0 1 64,
6 0 0 65535,
6 0 0 0"
init_partial()
{
capout=$(mktemp)
@ -2608,6 +2626,19 @@ remove_tests()
chk_rst_nr 0 0
fi
# signal+subflow with limits, remove
if reset "remove signal+subflow with limits"; then
pm_nl_set_limits $ns1 0 0
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,subflow
pm_nl_set_limits $ns2 0 0
addr_nr_ns1=-1 speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 0 0 0
chk_add_nr 1 1
chk_rm_nr 1 0 invert
chk_rst_nr 0 0
fi
# addresses remove
if reset "remove addresses"; then
pm_nl_set_limits $ns1 3 3
@ -4217,6 +4248,14 @@ endpoint_tests()
chk_subflow_nr "after no reject" 3
chk_mptcp_info subflows 2 subflows 2
# To make sure RM_ADDR are sent over a different subflow, but
# allow the rest to quickly and cleanly close the subflow
local ipt=1
ip netns exec "${ns2}" ${iptables} -I OUTPUT -s "10.0.1.2" \
-p tcp -m tcp --tcp-option 30 \
-m bpf --bytecode \
"$CBPF_MPTCP_SUBOPTION_RM_ADDR" \
-j DROP || ipt=0
local i
for i in $(seq 3); do
pm_nl_del_endpoint $ns2 1 10.0.1.2
@ -4229,6 +4268,7 @@ endpoint_tests()
chk_subflow_nr "after re-add id 0 ($i)" 3
chk_mptcp_info subflows 3 subflows 3
done
[ ${ipt} = 1 ] && ip netns exec "${ns2}" ${iptables} -D OUTPUT 1
mptcp_lib_kill_group_wait $tests_pid
@ -4288,11 +4328,20 @@ endpoint_tests()
chk_mptcp_info subflows 2 subflows 2
chk_mptcp_info add_addr_signal 2 add_addr_accepted 2
# To make sure RM_ADDR are sent over a different subflow, but
# allow the rest to quickly and cleanly close the subflow
local ipt=1
ip netns exec "${ns1}" ${iptables} -I OUTPUT -s "10.0.1.1" \
-p tcp -m tcp --tcp-option 30 \
-m bpf --bytecode \
"$CBPF_MPTCP_SUBOPTION_RM_ADDR" \
-j DROP || ipt=0
pm_nl_del_endpoint $ns1 42 10.0.1.1
sleep 0.5
chk_subflow_nr "after delete ID 0" 2
chk_mptcp_info subflows 2 subflows 2
chk_mptcp_info add_addr_signal 2 add_addr_accepted 2
[ ${ipt} = 1 ] && ip netns exec "${ns1}" ${iptables} -D OUTPUT 1
pm_nl_add_endpoint $ns1 10.0.1.1 id 99 flags signal
wait_mpj 4

View File

@ -237,10 +237,13 @@ run_test()
for dev in ns2eth1 ns2eth2; do
tc -n $ns2 qdisc del dev $dev root >/dev/null 2>&1
done
tc -n $ns1 qdisc add dev ns1eth1 root netem rate ${rate1}mbit $delay1
tc -n $ns1 qdisc add dev ns1eth2 root netem rate ${rate2}mbit $delay2
tc -n $ns2 qdisc add dev ns2eth1 root netem rate ${rate1}mbit $delay1
tc -n $ns2 qdisc add dev ns2eth2 root netem rate ${rate2}mbit $delay2
# keep the queued pkts number low, or the RTT estimator will see
# increasing latency over time.
tc -n $ns1 qdisc add dev ns1eth1 root netem rate ${rate1}mbit $delay1 limit 50
tc -n $ns1 qdisc add dev ns1eth2 root netem rate ${rate2}mbit $delay2 limit 50
tc -n $ns2 qdisc add dev ns2eth1 root netem rate ${rate1}mbit $delay1 limit 50
tc -n $ns2 qdisc add dev ns2eth2 root netem rate ${rate2}mbit $delay2 limit 50
# time is measured in ms, account for transfer size, aggregated link speed
# and header overhead (10%)