Merge branch 'mptcp-add-tcp_maxseg-sockopt-support'

Matthieu Baerts says:

====================
mptcp: add TCP_MAXSEG sockopt support

The TCP_MAXSEG socket option was not supported by MPTCP, mainly because
it has never been requested before. But there are still valid use-cases,
e.g. with HAProxy.

- Patch 1 is a small cleanup patch in the MPTCP sockopt file.

- Patch 2 expose some code from TCP, to avoid duplicating it in MPTCP.

- Patch 3 adds TCP_MAXSEG sockopt support in MPTCP.

- Patch 4 is not related to the others, it fixes a typo in a comment.

Note that the new TCP_MAXSEG sockopt support has been validated by a new
packetdrill script on the MPTCP CI:

  https://github.com/multipath-tcp/packetdrill/pull/161

v1: https://lore.kernel.org/20250716-net-next-mptcp-tcp_maxseg-v1-0-548d3a5666f6@kernel.org
====================

Link: https://patch.msgid.link/20250719-net-next-mptcp-tcp_maxseg-v2-0-8c910fbc5307@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2025-07-21 17:48:36 -07:00
commit e8c24e23c4
5 changed files with 46 additions and 14 deletions

View File

@ -621,6 +621,7 @@ void tcp_sock_set_nodelay(struct sock *sk);
void tcp_sock_set_quickack(struct sock *sk, int val);
int tcp_sock_set_syncnt(struct sock *sk, int val);
int tcp_sock_set_user_timeout(struct sock *sk, int val);
int tcp_sock_set_maxseg(struct sock *sk, int val);
static inline bool dst_tcp_usec_ts(const struct dst_entry *dst)
{

View File

@ -3751,6 +3751,19 @@ int tcp_set_window_clamp(struct sock *sk, int val)
return 0;
}
int tcp_sock_set_maxseg(struct sock *sk, int val)
{
/* Values greater than interface MTU won't take effect. However
* at the point when this call is done we typically don't yet
* know which interface is going to be used
*/
if (val && (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW))
return -EINVAL;
tcp_sk(sk)->rx_opt.user_mss = val;
return 0;
}
/*
* Socket option code for TCP.
*/
@ -3883,15 +3896,7 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
switch (optname) {
case TCP_MAXSEG:
/* Values greater than interface MTU won't take effect. However
* at the point when this call is done we typically don't yet
* know which interface is going to be used
*/
if (val && (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW)) {
err = -EINVAL;
break;
}
tp->rx_opt.user_mss = val;
err = tcp_sock_set_maxseg(sk, val);
break;
case TCP_NODELAY:

View File

@ -1387,7 +1387,7 @@ struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
* - estimate the faster flow linger time
* - use the above to estimate the amount of byte transferred
* by the faster flow
* - check that the amount of queued data is greter than the above,
* - check that the amount of queued data is greater than the above,
* otherwise do not use the picked, slower, subflow
* We select the subflow with the shorter estimated time to flush
* the queued mem, which basically ensure the above. We just need

View File

@ -326,6 +326,7 @@ struct mptcp_sock {
int keepalive_cnt;
int keepalive_idle;
int keepalive_intvl;
int maxseg;
struct work_struct work;
struct sk_buff *ooo_last_skb;
struct rb_root out_of_order_queue;

View File

@ -798,6 +798,23 @@ static int mptcp_setsockopt_first_sf_only(struct mptcp_sock *msk, int level, int
return ret;
}
static int mptcp_setsockopt_all_sf(struct mptcp_sock *msk, int level,
int optname, sockptr_t optval,
unsigned int optlen)
{
struct mptcp_subflow_context *subflow;
int ret = 0;
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
ret = tcp_setsockopt(ssk, level, optname, optval, optlen);
if (ret)
break;
}
return ret;
}
static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
sockptr_t optval, unsigned int optlen)
{
@ -859,6 +876,11 @@ static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
&msk->keepalive_cnt,
val);
break;
case TCP_MAXSEG:
msk->maxseg = val;
ret = mptcp_setsockopt_all_sf(msk, SOL_TCP, optname, optval,
optlen);
break;
default:
ret = -ENOPROTOOPT;
}
@ -914,10 +936,8 @@ static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int
lock_sock(sk);
ssk = msk->first;
if (ssk) {
ret = tcp_getsockopt(ssk, level, optname, optval, optlen);
goto out;
}
if (ssk)
goto get;
ssk = __mptcp_nmpc_sk(msk);
if (IS_ERR(ssk)) {
@ -925,6 +945,7 @@ static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int
goto out;
}
get:
ret = tcp_getsockopt(ssk, level, optname, optval, optlen);
out:
@ -1407,6 +1428,9 @@ static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
return mptcp_put_int_option(msk, optval, optlen, msk->notsent_lowat);
case TCP_IS_MPTCP:
return mptcp_put_int_option(msk, optval, optlen, 1);
case TCP_MAXSEG:
return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname,
optval, optlen);
}
return -EOPNOTSUPP;
}
@ -1553,6 +1577,7 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk)
tcp_sock_set_keepidle_locked(ssk, msk->keepalive_idle);
tcp_sock_set_keepintvl(ssk, msk->keepalive_intvl);
tcp_sock_set_keepcnt(ssk, msk->keepalive_cnt);
tcp_sock_set_maxseg(ssk, msk->maxseg);
inet_assign_bit(TRANSPARENT, ssk, inet_test_bit(TRANSPARENT, sk));
inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk));