From edd669057c56966b598a464d6e8c9fc0122a1b1c Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Sat, 19 Jul 2025 00:06:56 +0200 Subject: [PATCH 1/4] mptcp: sockopt: drop redundant tcp_getsockopt tcp_getsockopt() is called twice in mptcp_getsockopt_first_sf_only() in different conditions, which makes the code a bit redundant. The first call to tcp_getsockopt() when the first subflow exists can be replaced by going to a new label "get" before the second call. Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250719-net-next-mptcp-tcp_maxseg-v2-1-8c910fbc5307@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/sockopt.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 3caa0a9d3b38..afa54fba51e2 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -914,10 +914,8 @@ static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int lock_sock(sk); ssk = msk->first; - if (ssk) { - ret = tcp_getsockopt(ssk, level, optname, optval, optlen); - goto out; - } + if (ssk) + goto get; ssk = __mptcp_nmpc_sk(msk); if (IS_ERR(ssk)) { @@ -925,6 +923,7 @@ static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int goto out; } +get: ret = tcp_getsockopt(ssk, level, optname, optval, optlen); out: From 51a62199a8aaac0d1645b1dd8e670a6f35aead81 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Sat, 19 Jul 2025 00:06:57 +0200 Subject: [PATCH 2/4] tcp: add tcp_sock_set_maxseg Add a helper tcp_sock_set_maxseg() to directly set the TCP_MAXSEG sockopt from kernel space. This new helper will be used in the following patch from MPTCP. Signed-off-by: Geliang Tang Acked-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250719-net-next-mptcp-tcp_maxseg-v2-2-8c910fbc5307@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/tcp.h | 1 + net/ipv4/tcp.c | 23 ++++++++++++++--------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 1a5737b3753d..57e478bfaef2 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -621,6 +621,7 @@ void tcp_sock_set_nodelay(struct sock *sk); void tcp_sock_set_quickack(struct sock *sk, int val); int tcp_sock_set_syncnt(struct sock *sk, int val); int tcp_sock_set_user_timeout(struct sock *sk, int val); +int tcp_sock_set_maxseg(struct sock *sk, int val); static inline bool dst_tcp_usec_ts(const struct dst_entry *dst) { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 31149a0ac849..71a956fbfc55 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3751,6 +3751,19 @@ int tcp_set_window_clamp(struct sock *sk, int val) return 0; } +int tcp_sock_set_maxseg(struct sock *sk, int val) +{ + /* Values greater than interface MTU won't take effect. However + * at the point when this call is done we typically don't yet + * know which interface is going to be used + */ + if (val && (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW)) + return -EINVAL; + + tcp_sk(sk)->rx_opt.user_mss = val; + return 0; +} + /* * Socket option code for TCP. */ @@ -3883,15 +3896,7 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname, switch (optname) { case TCP_MAXSEG: - /* Values greater than interface MTU won't take effect. However - * at the point when this call is done we typically don't yet - * know which interface is going to be used - */ - if (val && (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW)) { - err = -EINVAL; - break; - } - tp->rx_opt.user_mss = val; + err = tcp_sock_set_maxseg(sk, val); break; case TCP_NODELAY: From 51c5fd09e1b457eed103a22893603fb83a818162 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Sat, 19 Jul 2025 00:06:58 +0200 Subject: [PATCH 3/4] mptcp: add TCP_MAXSEG sockopt support The TCP_MAXSEG socket option is currently not supported by MPTCP, mainly because it has never been requested before. But there are still valid use-cases, e.g. with HAProxy. This patch adds its support in MPTCP by propagating the value to all subflows. The get part looks at the value on the first subflow, to be as closed as possible to TCP. Only one value can be returned for the cached MSS, so this can come only from one subflow. Similar to mptcp_setsockopt_first_sf_only(), a generic helper mptcp_setsockopt_all_subflows() is added to set sockopt for each subflows of the mptcp socket. Add a new member for struct mptcp_sock to store the TCP_MAXSEG value, and return this value in getsockopt. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/515 Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250719-net-next-mptcp-tcp_maxseg-v2-3-8c910fbc5307@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.h | 1 + net/mptcp/sockopt.c | 26 ++++++++++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 6ec245fd2778..1a32edf6f343 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -326,6 +326,7 @@ struct mptcp_sock { int keepalive_cnt; int keepalive_idle; int keepalive_intvl; + int maxseg; struct work_struct work; struct sk_buff *ooo_last_skb; struct rb_root out_of_order_queue; diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index afa54fba51e2..2c267aff95be 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -798,6 +798,23 @@ static int mptcp_setsockopt_first_sf_only(struct mptcp_sock *msk, int level, int return ret; } +static int mptcp_setsockopt_all_sf(struct mptcp_sock *msk, int level, + int optname, sockptr_t optval, + unsigned int optlen) +{ + struct mptcp_subflow_context *subflow; + int ret = 0; + + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + + ret = tcp_setsockopt(ssk, level, optname, optval, optlen); + if (ret) + break; + } + return ret; +} + static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, sockptr_t optval, unsigned int optlen) { @@ -859,6 +876,11 @@ static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, &msk->keepalive_cnt, val); break; + case TCP_MAXSEG: + msk->maxseg = val; + ret = mptcp_setsockopt_all_sf(msk, SOL_TCP, optname, optval, + optlen); + break; default: ret = -ENOPROTOOPT; } @@ -1406,6 +1428,9 @@ static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname, return mptcp_put_int_option(msk, optval, optlen, msk->notsent_lowat); case TCP_IS_MPTCP: return mptcp_put_int_option(msk, optval, optlen, 1); + case TCP_MAXSEG: + return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname, + optval, optlen); } return -EOPNOTSUPP; } @@ -1552,6 +1577,7 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk) tcp_sock_set_keepidle_locked(ssk, msk->keepalive_idle); tcp_sock_set_keepintvl(ssk, msk->keepalive_intvl); tcp_sock_set_keepcnt(ssk, msk->keepalive_cnt); + tcp_sock_set_maxseg(ssk, msk->maxseg); inet_assign_bit(TRANSPARENT, ssk, inet_test_bit(TRANSPARENT, sk)); inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk)); From 154e56a77d81ad49eb979aa64463d5fcfad51136 Mon Sep 17 00:00:00 2001 From: moyuanhao Date: Sat, 19 Jul 2025 00:06:59 +0200 Subject: [PATCH 4/4] mptcp: fix typo in a comment This patch fixes the follow spelling mistake in a comment: greter -> greater Signed-off-by: moyuanhao Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250719-net-next-mptcp-tcp_maxseg-v2-4-8c910fbc5307@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 2ad1c41e963e..6c448a0be949 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1387,7 +1387,7 @@ struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) * - estimate the faster flow linger time * - use the above to estimate the amount of byte transferred * by the faster flow - * - check that the amount of queued data is greter than the above, + * - check that the amount of queued data is greater than the above, * otherwise do not use the picked, slower, subflow * We select the subflow with the shorter estimated time to flush * the queued mem, which basically ensure the above. We just need