Merge branch 'mptcp-misc-features-for-v6-18'

Matthieu Baerts says:

====================
mptcp: misc. features for v6.18

This series contains 4 independent new features:

- Patch 1: use HMAC-SHA256 library instead of open-coded HMAC.

- Patch 2: selftests: check for unexpected fallback counter increments.

- Patches 3-4: record subflows in RPS table, for aRFS support.

v1: https://lore.kernel.org/20250901-net-next-mptcp-misc-feat-6-18-v1-0-80ae80d2b903@kernel.org
====================

Link: https://patch.msgid.link/20250902-net-next-mptcp-misc-feat-6-18-v2-0-fa02bb3188b1@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2025-09-03 15:08:22 -07:00
commit a229866f7d
4 changed files with 202 additions and 62 deletions

View File

@ -85,11 +85,8 @@ static inline void rps_record_sock_flow(struct rps_sock_flow_table *table,
WRITE_ONCE(table->ents[index], val);
}
#endif /* CONFIG_RPS */
static inline void sock_rps_record_flow_hash(__u32 hash)
static inline void _sock_rps_record_flow_hash(__u32 hash)
{
#ifdef CONFIG_RPS
struct rps_sock_flow_table *sock_flow_table;
if (!hash)
@ -99,42 +96,33 @@ static inline void sock_rps_record_flow_hash(__u32 hash)
if (sock_flow_table)
rps_record_sock_flow(sock_flow_table, hash);
rcu_read_unlock();
#endif
}
static inline void sock_rps_record_flow(const struct sock *sk)
static inline void _sock_rps_record_flow(const struct sock *sk)
{
#ifdef CONFIG_RPS
if (static_branch_unlikely(&rfs_needed)) {
/* Reading sk->sk_rxhash might incur an expensive cache line
* miss.
*
* TCP_ESTABLISHED does cover almost all states where RFS
* might be useful, and is cheaper [1] than testing :
* IPv4: inet_sk(sk)->inet_daddr
* IPv6: ipv6_addr_any(&sk->sk_v6_daddr)
* OR an additional socket flag
* [1] : sk_state and sk_prot are in the same cache line.
/* Reading sk->sk_rxhash might incur an expensive cache line
* miss.
*
* TCP_ESTABLISHED does cover almost all states where RFS
* might be useful, and is cheaper [1] than testing :
* IPv4: inet_sk(sk)->inet_daddr
* IPv6: ipv6_addr_any(&sk->sk_v6_daddr)
* OR an additional socket flag
* [1] : sk_state and sk_prot are in the same cache line.
*/
if (sk->sk_state == TCP_ESTABLISHED) {
/* This READ_ONCE() is paired with the WRITE_ONCE()
* from sock_rps_save_rxhash() and sock_rps_reset_rxhash().
*/
if (sk->sk_state == TCP_ESTABLISHED) {
/* This READ_ONCE() is paired with the WRITE_ONCE()
* from sock_rps_save_rxhash() and sock_rps_reset_rxhash().
*/
sock_rps_record_flow_hash(READ_ONCE(sk->sk_rxhash));
}
_sock_rps_record_flow_hash(READ_ONCE(sk->sk_rxhash));
}
#endif
}
static inline void sock_rps_delete_flow(const struct sock *sk)
static inline void _sock_rps_delete_flow(const struct sock *sk)
{
#ifdef CONFIG_RPS
struct rps_sock_flow_table *table;
u32 hash, index;
if (!static_branch_unlikely(&rfs_needed))
return;
hash = READ_ONCE(sk->sk_rxhash);
if (!hash)
return;
@ -147,6 +135,45 @@ static inline void sock_rps_delete_flow(const struct sock *sk)
WRITE_ONCE(table->ents[index], RPS_NO_CPU);
}
rcu_read_unlock();
}
#endif /* CONFIG_RPS */
static inline bool rfs_is_needed(void)
{
#ifdef CONFIG_RPS
return static_branch_unlikely(&rfs_needed);
#else
return false;
#endif
}
static inline void sock_rps_record_flow_hash(__u32 hash)
{
#ifdef CONFIG_RPS
if (!rfs_is_needed())
return;
_sock_rps_record_flow_hash(hash);
#endif
}
static inline void sock_rps_record_flow(const struct sock *sk)
{
#ifdef CONFIG_RPS
if (!rfs_is_needed())
return;
_sock_rps_record_flow(sk);
#endif
}
static inline void sock_rps_delete_flow(const struct sock *sk)
{
#ifdef CONFIG_RPS
if (!rfs_is_needed())
return;
_sock_rps_delete_flow(sk);
#endif
}

View File

@ -22,7 +22,6 @@
#include <linux/kernel.h>
#include <crypto/sha2.h>
#include <linux/unaligned.h>
#include "protocol.h"
@ -43,39 +42,9 @@ void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn)
void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac)
{
u8 input[SHA256_BLOCK_SIZE + SHA256_DIGEST_SIZE];
u8 key1be[8];
u8 key2be[8];
int i;
__be64 key[2] = { cpu_to_be64(key1), cpu_to_be64(key2) };
if (WARN_ON_ONCE(len > SHA256_DIGEST_SIZE))
len = SHA256_DIGEST_SIZE;
put_unaligned_be64(key1, key1be);
put_unaligned_be64(key2, key2be);
/* Generate key xored with ipad */
memset(input, 0x36, SHA256_BLOCK_SIZE);
for (i = 0; i < 8; i++)
input[i] ^= key1be[i];
for (i = 0; i < 8; i++)
input[i + 8] ^= key2be[i];
memcpy(&input[SHA256_BLOCK_SIZE], msg, len);
/* emit sha256(K1 || msg) on the second input block, so we can
* reuse 'input' for the last hashing
*/
sha256(input, SHA256_BLOCK_SIZE + len, &input[SHA256_BLOCK_SIZE]);
/* Prepare second part of hmac */
memset(input, 0x5C, SHA256_BLOCK_SIZE);
for (i = 0; i < 8; i++)
input[i] ^= key1be[i];
for (i = 0; i < 8; i++)
input[i + 8] ^= key2be[i];
sha256(input, SHA256_BLOCK_SIZE + SHA256_DIGEST_SIZE, hmac);
hmac_sha256_usingrawkey((const u8 *)key, sizeof(key), msg, len, hmac);
}
#if IS_MODULE(CONFIG_MPTCP_KUNIT_TEST)

View File

@ -12,6 +12,7 @@
#include <linux/sched/signal.h>
#include <linux/atomic.h>
#include <net/aligned_data.h>
#include <net/rps.h>
#include <net/sock.h>
#include <net/inet_common.h>
#include <net/inet_hashtables.h>
@ -1740,6 +1741,20 @@ static u32 mptcp_send_limit(const struct sock *sk)
return limit - not_sent;
}
static void mptcp_rps_record_subflows(const struct mptcp_sock *msk)
{
struct mptcp_subflow_context *subflow;
if (!rfs_is_needed())
return;
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
sock_rps_record_flow(ssk);
}
}
static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{
struct mptcp_sock *msk = mptcp_sk(sk);
@ -1753,6 +1768,8 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
lock_sock(sk);
mptcp_rps_record_subflows(msk);
if (unlikely(inet_test_bit(DEFER_CONNECT, sk) ||
msg->msg_flags & MSG_FASTOPEN)) {
int copied_syn = 0;
@ -2131,6 +2148,8 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
goto out_err;
}
mptcp_rps_record_subflows(msk);
timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
len = min_t(size_t, len, INT_MAX);
@ -3922,6 +3941,8 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
mptcp_sock_graft(ssk, newsock);
}
mptcp_rps_record_subflows(msk);
/* Do late cleanup for the first subflow as necessary. Also
* deal with bad peers not doing a complete shutdown.
*/

View File

@ -74,6 +74,17 @@ unset join_create_err
unset join_bind_err
unset join_connect_err
unset fb_ns1
unset fb_ns2
unset fb_infinite_map_tx
unset fb_dss_corruption
unset fb_simult_conn
unset fb_mpc_passive
unset fb_mpc_active
unset fb_mpc_data
unset fb_md5_sig
unset fb_dss
# generated using "nfbpf_compile '(ip && (ip[54] & 0xf0) == 0x30) ||
# (ip6 && (ip6[74] & 0xf0) == 0x30)'"
CBPF_MPTCP_SUBOPTION_ADD_ADDR="14,
@ -1399,6 +1410,115 @@ chk_join_tx_nr()
print_results "join Tx" ${rc}
}
chk_fallback_nr()
{
local infinite_map_tx=${fb_infinite_map_tx:-0}
local dss_corruption=${fb_dss_corruption:-0}
local simult_conn=${fb_simult_conn:-0}
local mpc_passive=${fb_mpc_passive:-0}
local mpc_active=${fb_mpc_active:-0}
local mpc_data=${fb_mpc_data:-0}
local md5_sig=${fb_md5_sig:-0}
local dss=${fb_dss:-0}
local rc=${KSFT_PASS}
local ns=$1
local count
count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtInfiniteMapTx")
if [ -z "$count" ]; then
rc=${KSFT_SKIP}
elif [ "$count" != "$infinite_map_tx" ]; then
rc=${KSFT_FAIL}
print_check "$ns infinite map tx fallback"
fail_test "got $count infinite map tx fallback[s] in $ns expected $infinite_map_tx"
fi
count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtDSSCorruptionFallback")
if [ -z "$count" ]; then
rc=${KSFT_SKIP}
elif [ "$count" != "$dss_corruption" ]; then
rc=${KSFT_FAIL}
print_check "$ns dss corruption fallback"
fail_test "got $count dss corruption fallback[s] in $ns expected $dss_corruption"
fi
count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtSimultConnectFallback")
if [ -z "$count" ]; then
rc=${KSFT_SKIP}
elif [ "$count" != "$simult_conn" ]; then
rc=${KSFT_FAIL}
print_check "$ns simult conn fallback"
fail_test "got $count simult conn fallback[s] in $ns expected $simult_conn"
fi
count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMPCapableFallbackACK")
if [ -z "$count" ]; then
rc=${KSFT_SKIP}
elif [ "$count" != "$mpc_passive" ]; then
rc=${KSFT_FAIL}
print_check "$ns mpc passive fallback"
fail_test "got $count mpc passive fallback[s] in $ns expected $mpc_passive"
fi
count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMPCapableFallbackSYNACK")
if [ -z "$count" ]; then
rc=${KSFT_SKIP}
elif [ "$count" != "$mpc_active" ]; then
rc=${KSFT_FAIL}
print_check "$ns mpc active fallback"
fail_test "got $count mpc active fallback[s] in $ns expected $mpc_active"
fi
count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMPCapableDataFallback")
if [ -z "$count" ]; then
rc=${KSFT_SKIP}
elif [ "$count" != "$mpc_data" ]; then
rc=${KSFT_FAIL}
print_check "$ns mpc data fallback"
fail_test "got $count mpc data fallback[s] in $ns expected $mpc_data"
fi
count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMD5SigFallback")
if [ -z "$count" ]; then
rc=${KSFT_SKIP}
elif [ "$count" != "$md5_sig" ]; then
rc=${KSFT_FAIL}
print_check "$ns MD5 Sig fallback"
fail_test "got $count MD5 Sig fallback[s] in $ns expected $md5_sig"
fi
count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtDssFallback")
if [ -z "$count" ]; then
rc=${KSFT_SKIP}
elif [ "$count" != "$dss" ]; then
rc=${KSFT_FAIL}
print_check "$ns dss fallback"
fail_test "got $count dss fallback[s] in $ns expected $dss"
fi
return $rc
}
chk_fallback_nr_all()
{
local netns=("ns1" "ns2")
local fb_ns=("fb_ns1" "fb_ns2")
local rc=${KSFT_PASS}
for i in 0 1; do
if [ -n "${!fb_ns[i]}" ]; then
eval "${!fb_ns[i]}" \
chk_fallback_nr ${netns[i]} || rc=${?}
else
chk_fallback_nr ${netns[i]} || rc=${?}
fi
done
if [ "${rc}" != "${KSFT_PASS}" ]; then
print_results "fallback" ${rc}
fi
}
chk_join_nr()
{
local syn_nr=$1
@ -1484,6 +1604,8 @@ chk_join_nr()
join_syn_tx="${join_syn_tx:-${syn_nr}}" \
chk_join_tx_nr
chk_fallback_nr_all
if $validate_checksum; then
chk_csum_nr $csum_ns1 $csum_ns2
chk_fail_nr $fail_nr $fail_nr
@ -3337,6 +3459,7 @@ fail_tests()
join_csum_ns1=+1 join_csum_ns2=+0 \
join_fail_nr=1 join_rst_nr=0 join_infi_nr=1 \
join_corrupted_pkts="$(pedit_action_pkts)" \
fb_ns1="fb_dss=1" fb_ns2="fb_infinite_map_tx=1" \
chk_join_nr 0 0 0
chk_fail_nr 1 -1 invert
fi