Merge branch 'mptcp-various-rare-sending-issues'

Matthieu Baerts says:

====================
mptcp: various rare sending issues

Here are various fixes from Paolo, addressing very occasional issues on
the sending side:

- Patch 1: drop an optimisation that could lead to timeout in case of
  race conditions. A fix for up to v5.11.

- Patch 2: fix stream corruption under very specific conditions.
  A fix for up to v5.13.

- Patch 3: restore MPTCP-level zero window probe after a recent fix.
  A fix for up to v5.16.

- Patch 4: new MIB counter to track MPTCP-level zero windows probe to
  help catching issues similar to the one fixed by the previous patch.
====================

Link: https://patch.msgid.link/20251028-net-mptcp-send-timeout-v1-0-38ffff5a9ec8@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2025-10-29 17:44:30 -07:00
commit ac345c5fff
4 changed files with 39 additions and 22 deletions

View File

@ -85,6 +85,7 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("DssFallback", MPTCP_MIB_DSSFALLBACK),
SNMP_MIB_ITEM("SimultConnectFallback", MPTCP_MIB_SIMULTCONNFALLBACK),
SNMP_MIB_ITEM("FallbackFailed", MPTCP_MIB_FALLBACKFAILED),
SNMP_MIB_ITEM("WinProbe", MPTCP_MIB_WINPROBE),
};
/* mptcp_mib_alloc - allocate percpu mib counters

View File

@ -88,6 +88,7 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_DSSFALLBACK, /* Bad or missing DSS */
MPTCP_MIB_SIMULTCONNFALLBACK, /* Simultaneous connect */
MPTCP_MIB_FALLBACKFAILED, /* Can't fallback due to msk status */
MPTCP_MIB_WINPROBE, /* MPTCP-level zero window probe */
__MPTCP_MIB_MAX
};

View File

@ -1007,7 +1007,7 @@ static void __mptcp_clean_una(struct sock *sk)
if (WARN_ON_ONCE(!msk->recovery))
break;
WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
msk->first_pending = mptcp_send_next(sk);
}
dfrag_clear(sk, dfrag);
@ -1299,7 +1299,12 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
if (copy == 0) {
u64 snd_una = READ_ONCE(msk->snd_una);
if (snd_una != msk->snd_nxt || tcp_write_queue_tail(ssk)) {
/* No need for zero probe if there are any data pending
* either at the msk or ssk level; skb is the current write
* queue tail and can be empty at this point.
*/
if (snd_una != msk->snd_nxt || skb->len ||
skb != tcp_send_head(ssk)) {
tcp_remove_empty_skb(ssk);
return 0;
}
@ -1350,6 +1355,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
mpext->dsn64);
if (zero_window_probe) {
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_WINPROBE);
mptcp_subflow_ctx(ssk)->rel_write_seq += copy;
mpext->frozen = 1;
if (READ_ONCE(msk->csum_enabled))
@ -1552,7 +1558,7 @@ static int __subflow_push_pending(struct sock *sk, struct sock *ssk,
mptcp_update_post_push(msk, dfrag, ret);
}
WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
msk->first_pending = mptcp_send_next(sk);
if (msk->snd_burst <= 0 ||
!sk_stream_memory_free(ssk) ||
@ -1912,7 +1918,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
get_page(dfrag->page);
list_add_tail(&dfrag->list, &msk->rtx_queue);
if (!msk->first_pending)
WRITE_ONCE(msk->first_pending, dfrag);
msk->first_pending = dfrag;
}
pr_debug("msk=%p dfrag at seq=%llu len=%u sent=%u new=%d\n", msk,
dfrag->data_seq, dfrag->data_len, dfrag->already_sent,
@ -1945,22 +1951,36 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied);
static int __mptcp_recvmsg_mskq(struct sock *sk,
struct msghdr *msg,
size_t len, int flags,
static int __mptcp_recvmsg_mskq(struct sock *sk, struct msghdr *msg,
size_t len, int flags, int copied_total,
struct scm_timestamping_internal *tss,
int *cmsg_flags)
{
struct mptcp_sock *msk = mptcp_sk(sk);
struct sk_buff *skb, *tmp;
int total_data_len = 0;
int copied = 0;
skb_queue_walk_safe(&sk->sk_receive_queue, skb, tmp) {
u32 offset = MPTCP_SKB_CB(skb)->offset;
u32 delta, offset = MPTCP_SKB_CB(skb)->offset;
u32 data_len = skb->len - offset;
u32 count = min_t(size_t, len - copied, data_len);
u32 count;
int err;
if (flags & MSG_PEEK) {
/* skip already peeked skbs */
if (total_data_len + data_len <= copied_total) {
total_data_len += data_len;
continue;
}
/* skip the already peeked data in the current skb */
delta = copied_total - total_data_len;
offset += delta;
data_len -= delta;
}
count = min_t(size_t, len - copied, data_len);
if (!(flags & MSG_TRUNC)) {
err = skb_copy_datagram_msg(skb, offset, msg, count);
if (unlikely(err < 0)) {
@ -1977,16 +1997,14 @@ static int __mptcp_recvmsg_mskq(struct sock *sk,
copied += count;
if (count < data_len) {
if (!(flags & MSG_PEEK)) {
if (!(flags & MSG_PEEK)) {
msk->bytes_consumed += count;
if (count < data_len) {
MPTCP_SKB_CB(skb)->offset += count;
MPTCP_SKB_CB(skb)->map_seq += count;
msk->bytes_consumed += count;
break;
}
break;
}
if (!(flags & MSG_PEEK)) {
/* avoid the indirect call, we know the destructor is sock_rfree */
skb->destructor = NULL;
skb->sk = NULL;
@ -1994,7 +2012,6 @@ static int __mptcp_recvmsg_mskq(struct sock *sk,
sk_mem_uncharge(sk, skb->truesize);
__skb_unlink(skb, &sk->sk_receive_queue);
skb_attempt_defer_free(skb);
msk->bytes_consumed += count;
}
if (copied >= len)
@ -2191,7 +2208,8 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
while (copied < len) {
int err, bytes_read;
bytes_read = __mptcp_recvmsg_mskq(sk, msg, len - copied, flags, &tss, &cmsg_flags);
bytes_read = __mptcp_recvmsg_mskq(sk, msg, len - copied, flags,
copied, &tss, &cmsg_flags);
if (unlikely(bytes_read < 0)) {
if (!copied)
copied = bytes_read;
@ -2882,7 +2900,7 @@ static void __mptcp_clear_xmit(struct sock *sk)
struct mptcp_sock *msk = mptcp_sk(sk);
struct mptcp_data_frag *dtmp, *dfrag;
WRITE_ONCE(msk->first_pending, NULL);
msk->first_pending = NULL;
list_for_each_entry_safe(dfrag, dtmp, &msk->rtx_queue, list)
dfrag_clear(sk, dfrag);
}
@ -3422,9 +3440,6 @@ void __mptcp_data_acked(struct sock *sk)
void __mptcp_check_push(struct sock *sk, struct sock *ssk)
{
if (!mptcp_send_head(sk))
return;
if (!sock_owned_by_user(sk))
__mptcp_subflow_push_pending(sk, ssk, false);
else

View File

@ -414,7 +414,7 @@ static inline struct mptcp_data_frag *mptcp_send_head(const struct sock *sk)
{
const struct mptcp_sock *msk = mptcp_sk(sk);
return READ_ONCE(msk->first_pending);
return msk->first_pending;
}
static inline struct mptcp_data_frag *mptcp_send_next(struct sock *sk)