Merge branch 'mptcp-pm-nl-announce-deny-join-id0-flag'

Matthieu Baerts says:

====================
mptcp: pm: nl: announce deny-join-id0 flag

During the connection establishment, a peer can tell the other one that
it cannot establish new subflows to the initial IP address and port by
setting the 'C' flag [1]. Doing so makes sense when the sender is behind
a strict NAT, operating behind a legacy Layer 4 load balancer, or using
anycast IP address for example.

When this 'C' flag is set, the path-managers must then not try to
establish new subflows to the other peer's initial IP address and port.
The in-kernel PM has access to this info, but the userspace PM didn't,
not letting the userspace daemon able to respect the RFC8684.

Here are a few fixes related to this 'C' flag (aka 'deny-join-id0'):

- Patch 1: add remote_deny_join_id0 info on passive connections. A fix
  for v5.14.

- Patch 2: let the userspace PM daemon know about the deny_join_id0
  attribute, so when set, it can avoid creating new subflows to the
  initial IP address and port. A fix for v5.19.

- Patch 3: a validation for the previous commit.

- Patch 4: record the deny_join_id0 info when TFO is used. A fix for
  v6.2.

- Patch 5: not related to deny-join-id0, but it fixes errors messages in
  the sockopt selftests, not to create confusions. A fix for v6.5.
====================

Link: https://patch.msgid.link/20250912-net-mptcp-pm-uspace-deny_join_id0-v1-0-40171884ade8@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2025-09-15 18:12:08 -07:00
commit 97499e2818
9 changed files with 48 additions and 16 deletions

View File

@ -28,13 +28,13 @@ definitions:
traffic-patterns it can take a long time until the
MPTCP_EVENT_ESTABLISHED is sent.
Attributes: token, family, saddr4 | saddr6, daddr4 | daddr6, sport,
dport, server-side.
dport, server-side, [flags].
-
name: established
doc: >-
A MPTCP connection is established (can start new subflows).
Attributes: token, family, saddr4 | saddr6, daddr4 | daddr6, sport,
dport, server-side.
dport, server-side, [flags].
-
name: closed
doc: >-

View File

@ -31,6 +31,8 @@
#define MPTCP_INFO_FLAG_FALLBACK _BITUL(0)
#define MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED _BITUL(1)
#define MPTCP_PM_EV_FLAG_DENY_JOIN_ID0 _BITUL(0)
#define MPTCP_PM_ADDR_FLAG_SIGNAL (1 << 0)
#define MPTCP_PM_ADDR_FLAG_SUBFLOW (1 << 1)
#define MPTCP_PM_ADDR_FLAG_BACKUP (1 << 2)

View File

@ -16,10 +16,10 @@
* good time to allocate memory and send ADD_ADDR if needed. Depending on the
* traffic-patterns it can take a long time until the MPTCP_EVENT_ESTABLISHED
* is sent. Attributes: token, family, saddr4 | saddr6, daddr4 | daddr6,
* sport, dport, server-side.
* sport, dport, server-side, [flags].
* @MPTCP_EVENT_ESTABLISHED: A MPTCP connection is established (can start new
* subflows). Attributes: token, family, saddr4 | saddr6, daddr4 | daddr6,
* sport, dport, server-side.
* sport, dport, server-side, [flags].
* @MPTCP_EVENT_CLOSED: A MPTCP connection has stopped. Attribute: token.
* @MPTCP_EVENT_ANNOUNCED: A new address has been announced by the peer.
* Attributes: token, rem_id, family, daddr4 | daddr6 [, dport].

View File

@ -985,13 +985,13 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
return false;
}
if (mp_opt->deny_join_id0)
WRITE_ONCE(msk->pm.remote_deny_join_id0, true);
if (unlikely(!READ_ONCE(msk->pm.server_side)))
pr_warn_once("bogus mpc option on established client sk");
set_fully_established:
if (mp_opt->deny_join_id0)
WRITE_ONCE(msk->pm.remote_deny_join_id0, true);
mptcp_data_lock((struct sock *)msk);
__mptcp_subflow_fully_established(msk, subflow, mp_opt);
mptcp_data_unlock((struct sock *)msk);

View File

@ -408,6 +408,7 @@ static int mptcp_event_created(struct sk_buff *skb,
const struct sock *ssk)
{
int err = nla_put_u32(skb, MPTCP_ATTR_TOKEN, READ_ONCE(msk->token));
u16 flags = 0;
if (err)
return err;
@ -415,6 +416,12 @@ static int mptcp_event_created(struct sk_buff *skb,
if (nla_put_u8(skb, MPTCP_ATTR_SERVER_SIDE, READ_ONCE(msk->pm.server_side)))
return -EMSGSIZE;
if (READ_ONCE(msk->pm.remote_deny_join_id0))
flags |= MPTCP_PM_EV_FLAG_DENY_JOIN_ID0;
if (flags && nla_put_u16(skb, MPTCP_ATTR_FLAGS, flags))
return -EMSGSIZE;
return mptcp_event_add_subflow(skb, ssk);
}

View File

@ -883,6 +883,10 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
ctx->subflow_id = 1;
owner = mptcp_sk(ctx->conn);
if (mp_opt.deny_join_id0)
WRITE_ONCE(owner->pm.remote_deny_join_id0, true);
mptcp_pm_new_connection(owner, child, 1);
/* with OoO packets we can reach here without ingress

View File

@ -667,22 +667,26 @@ static void process_one_client(int fd, int pipefd)
do_getsockopts(&s, fd, ret, ret2);
if (s.mptcpi_rcv_delta != (uint64_t)ret + 1)
xerror("mptcpi_rcv_delta %" PRIu64 ", expect %" PRIu64, s.mptcpi_rcv_delta, ret + 1, s.mptcpi_rcv_delta - ret);
xerror("mptcpi_rcv_delta %" PRIu64 ", expect %" PRIu64 ", diff %" PRId64,
s.mptcpi_rcv_delta, ret + 1, s.mptcpi_rcv_delta - (ret + 1));
/* be nice when running on top of older kernel */
if (s.pkt_stats_avail) {
if (s.last_sample.mptcpi_bytes_sent != ret2)
xerror("mptcpi_bytes_sent %" PRIu64 ", expect %" PRIu64,
xerror("mptcpi_bytes_sent %" PRIu64 ", expect %" PRIu64
", diff %" PRId64,
s.last_sample.mptcpi_bytes_sent, ret2,
s.last_sample.mptcpi_bytes_sent - ret2);
if (s.last_sample.mptcpi_bytes_received != ret)
xerror("mptcpi_bytes_received %" PRIu64 ", expect %" PRIu64,
xerror("mptcpi_bytes_received %" PRIu64 ", expect %" PRIu64
", diff %" PRId64,
s.last_sample.mptcpi_bytes_received, ret,
s.last_sample.mptcpi_bytes_received - ret);
if (s.last_sample.mptcpi_bytes_acked != ret)
xerror("mptcpi_bytes_acked %" PRIu64 ", expect %" PRIu64,
s.last_sample.mptcpi_bytes_acked, ret2,
s.last_sample.mptcpi_bytes_acked - ret2);
xerror("mptcpi_bytes_acked %" PRIu64 ", expect %" PRIu64
", diff %" PRId64,
s.last_sample.mptcpi_bytes_acked, ret,
s.last_sample.mptcpi_bytes_acked - ret);
}
close(fd);

View File

@ -188,6 +188,13 @@ static int capture_events(int fd, int event_group)
fprintf(stderr, ",error:%u", *(__u8 *)RTA_DATA(attrs));
else if (attrs->rta_type == MPTCP_ATTR_SERVER_SIDE)
fprintf(stderr, ",server_side:%u", *(__u8 *)RTA_DATA(attrs));
else if (attrs->rta_type == MPTCP_ATTR_FLAGS) {
__u16 flags = *(__u16 *)RTA_DATA(attrs);
/* only print when present, easier */
if (flags & MPTCP_PM_EV_FLAG_DENY_JOIN_ID0)
fprintf(stderr, ",deny_join_id0:1");
}
attrs = RTA_NEXT(attrs, msg_len);
}

View File

@ -201,6 +201,9 @@ make_connection()
is_v6="v4"
fi
# set this on the client side only: will not affect the rest
ip netns exec "$ns2" sysctl -q net.mptcp.allow_join_initial_addr_port=0
:>"$client_evts"
:>"$server_evts"
@ -223,23 +226,28 @@ make_connection()
local client_token
local client_port
local client_serverside
local client_nojoin
local server_token
local server_serverside
local server_nojoin
client_token=$(mptcp_lib_evts_get_info token "$client_evts")
client_port=$(mptcp_lib_evts_get_info sport "$client_evts")
client_serverside=$(mptcp_lib_evts_get_info server_side "$client_evts")
client_nojoin=$(mptcp_lib_evts_get_info deny_join_id0 "$client_evts")
server_token=$(mptcp_lib_evts_get_info token "$server_evts")
server_serverside=$(mptcp_lib_evts_get_info server_side "$server_evts")
server_nojoin=$(mptcp_lib_evts_get_info deny_join_id0 "$server_evts")
print_test "Established IP${is_v6} MPTCP Connection ns2 => ns1"
if [ "$client_token" != "" ] && [ "$server_token" != "" ] && [ "$client_serverside" = 0 ] &&
[ "$server_serverside" = 1 ]
if [ "${client_token}" != "" ] && [ "${server_token}" != "" ] &&
[ "${client_serverside}" = 0 ] && [ "${server_serverside}" = 1 ] &&
[ "${client_nojoin:-0}" = 0 ] && [ "${server_nojoin:-0}" = 1 ]
then
test_pass
print_title "Connection info: ${client_addr}:${client_port} -> ${connect_addr}:${app_port}"
else
test_fail "Expected tokens (c:${client_token} - s:${server_token}) and server (c:${client_serverside} - s:${server_serverside})"
test_fail "Expected tokens (c:${client_token} - s:${server_token}), server (c:${client_serverside} - s:${server_serverside}), nojoin (c:${client_nojoin} - s:${server_nojoin})"
mptcp_lib_result_print_all_tap
exit ${KSFT_FAIL}
fi