mirror of
https://github.com/torvalds/linux.git
synced 2026-06-03 03:53:37 +02:00
Merge branch 'mptcp-misc-features-for-v6-18'
Matthieu Baerts says: ==================== mptcp: misc. features for v6.18 This series contains 4 independent new features: - Patch 1: use HMAC-SHA256 library instead of open-coded HMAC. - Patch 2: selftests: check for unexpected fallback counter increments. - Patches 3-4: record subflows in RPS table, for aRFS support. v1: https://lore.kernel.org/20250901-net-next-mptcp-misc-feat-6-18-v1-0-80ae80d2b903@kernel.org ==================== Link: https://patch.msgid.link/20250902-net-next-mptcp-misc-feat-6-18-v2-0-fa02bb3188b1@kernel.org Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
commit
a229866f7d
|
|
@ -85,11 +85,8 @@ static inline void rps_record_sock_flow(struct rps_sock_flow_table *table,
|
|||
WRITE_ONCE(table->ents[index], val);
|
||||
}
|
||||
|
||||
#endif /* CONFIG_RPS */
|
||||
|
||||
static inline void sock_rps_record_flow_hash(__u32 hash)
|
||||
static inline void _sock_rps_record_flow_hash(__u32 hash)
|
||||
{
|
||||
#ifdef CONFIG_RPS
|
||||
struct rps_sock_flow_table *sock_flow_table;
|
||||
|
||||
if (!hash)
|
||||
|
|
@ -99,42 +96,33 @@ static inline void sock_rps_record_flow_hash(__u32 hash)
|
|||
if (sock_flow_table)
|
||||
rps_record_sock_flow(sock_flow_table, hash);
|
||||
rcu_read_unlock();
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void sock_rps_record_flow(const struct sock *sk)
|
||||
static inline void _sock_rps_record_flow(const struct sock *sk)
|
||||
{
|
||||
#ifdef CONFIG_RPS
|
||||
if (static_branch_unlikely(&rfs_needed)) {
|
||||
/* Reading sk->sk_rxhash might incur an expensive cache line
|
||||
* miss.
|
||||
*
|
||||
* TCP_ESTABLISHED does cover almost all states where RFS
|
||||
* might be useful, and is cheaper [1] than testing :
|
||||
* IPv4: inet_sk(sk)->inet_daddr
|
||||
* IPv6: ipv6_addr_any(&sk->sk_v6_daddr)
|
||||
* OR an additional socket flag
|
||||
* [1] : sk_state and sk_prot are in the same cache line.
|
||||
/* Reading sk->sk_rxhash might incur an expensive cache line
|
||||
* miss.
|
||||
*
|
||||
* TCP_ESTABLISHED does cover almost all states where RFS
|
||||
* might be useful, and is cheaper [1] than testing :
|
||||
* IPv4: inet_sk(sk)->inet_daddr
|
||||
* IPv6: ipv6_addr_any(&sk->sk_v6_daddr)
|
||||
* OR an additional socket flag
|
||||
* [1] : sk_state and sk_prot are in the same cache line.
|
||||
*/
|
||||
if (sk->sk_state == TCP_ESTABLISHED) {
|
||||
/* This READ_ONCE() is paired with the WRITE_ONCE()
|
||||
* from sock_rps_save_rxhash() and sock_rps_reset_rxhash().
|
||||
*/
|
||||
if (sk->sk_state == TCP_ESTABLISHED) {
|
||||
/* This READ_ONCE() is paired with the WRITE_ONCE()
|
||||
* from sock_rps_save_rxhash() and sock_rps_reset_rxhash().
|
||||
*/
|
||||
sock_rps_record_flow_hash(READ_ONCE(sk->sk_rxhash));
|
||||
}
|
||||
_sock_rps_record_flow_hash(READ_ONCE(sk->sk_rxhash));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void sock_rps_delete_flow(const struct sock *sk)
|
||||
static inline void _sock_rps_delete_flow(const struct sock *sk)
|
||||
{
|
||||
#ifdef CONFIG_RPS
|
||||
struct rps_sock_flow_table *table;
|
||||
u32 hash, index;
|
||||
|
||||
if (!static_branch_unlikely(&rfs_needed))
|
||||
return;
|
||||
|
||||
hash = READ_ONCE(sk->sk_rxhash);
|
||||
if (!hash)
|
||||
return;
|
||||
|
|
@ -147,6 +135,45 @@ static inline void sock_rps_delete_flow(const struct sock *sk)
|
|||
WRITE_ONCE(table->ents[index], RPS_NO_CPU);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
#endif /* CONFIG_RPS */
|
||||
|
||||
static inline bool rfs_is_needed(void)
|
||||
{
|
||||
#ifdef CONFIG_RPS
|
||||
return static_branch_unlikely(&rfs_needed);
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void sock_rps_record_flow_hash(__u32 hash)
|
||||
{
|
||||
#ifdef CONFIG_RPS
|
||||
if (!rfs_is_needed())
|
||||
return;
|
||||
|
||||
_sock_rps_record_flow_hash(hash);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void sock_rps_record_flow(const struct sock *sk)
|
||||
{
|
||||
#ifdef CONFIG_RPS
|
||||
if (!rfs_is_needed())
|
||||
return;
|
||||
|
||||
_sock_rps_record_flow(sk);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void sock_rps_delete_flow(const struct sock *sk)
|
||||
{
|
||||
#ifdef CONFIG_RPS
|
||||
if (!rfs_is_needed())
|
||||
return;
|
||||
|
||||
_sock_rps_delete_flow(sk);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -22,7 +22,6 @@
|
|||
|
||||
#include <linux/kernel.h>
|
||||
#include <crypto/sha2.h>
|
||||
#include <linux/unaligned.h>
|
||||
|
||||
#include "protocol.h"
|
||||
|
||||
|
|
@ -43,39 +42,9 @@ void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn)
|
|||
|
||||
void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac)
|
||||
{
|
||||
u8 input[SHA256_BLOCK_SIZE + SHA256_DIGEST_SIZE];
|
||||
u8 key1be[8];
|
||||
u8 key2be[8];
|
||||
int i;
|
||||
__be64 key[2] = { cpu_to_be64(key1), cpu_to_be64(key2) };
|
||||
|
||||
if (WARN_ON_ONCE(len > SHA256_DIGEST_SIZE))
|
||||
len = SHA256_DIGEST_SIZE;
|
||||
|
||||
put_unaligned_be64(key1, key1be);
|
||||
put_unaligned_be64(key2, key2be);
|
||||
|
||||
/* Generate key xored with ipad */
|
||||
memset(input, 0x36, SHA256_BLOCK_SIZE);
|
||||
for (i = 0; i < 8; i++)
|
||||
input[i] ^= key1be[i];
|
||||
for (i = 0; i < 8; i++)
|
||||
input[i + 8] ^= key2be[i];
|
||||
|
||||
memcpy(&input[SHA256_BLOCK_SIZE], msg, len);
|
||||
|
||||
/* emit sha256(K1 || msg) on the second input block, so we can
|
||||
* reuse 'input' for the last hashing
|
||||
*/
|
||||
sha256(input, SHA256_BLOCK_SIZE + len, &input[SHA256_BLOCK_SIZE]);
|
||||
|
||||
/* Prepare second part of hmac */
|
||||
memset(input, 0x5C, SHA256_BLOCK_SIZE);
|
||||
for (i = 0; i < 8; i++)
|
||||
input[i] ^= key1be[i];
|
||||
for (i = 0; i < 8; i++)
|
||||
input[i + 8] ^= key2be[i];
|
||||
|
||||
sha256(input, SHA256_BLOCK_SIZE + SHA256_DIGEST_SIZE, hmac);
|
||||
hmac_sha256_usingrawkey((const u8 *)key, sizeof(key), msg, len, hmac);
|
||||
}
|
||||
|
||||
#if IS_MODULE(CONFIG_MPTCP_KUNIT_TEST)
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@
|
|||
#include <linux/sched/signal.h>
|
||||
#include <linux/atomic.h>
|
||||
#include <net/aligned_data.h>
|
||||
#include <net/rps.h>
|
||||
#include <net/sock.h>
|
||||
#include <net/inet_common.h>
|
||||
#include <net/inet_hashtables.h>
|
||||
|
|
@ -1740,6 +1741,20 @@ static u32 mptcp_send_limit(const struct sock *sk)
|
|||
return limit - not_sent;
|
||||
}
|
||||
|
||||
static void mptcp_rps_record_subflows(const struct mptcp_sock *msk)
|
||||
{
|
||||
struct mptcp_subflow_context *subflow;
|
||||
|
||||
if (!rfs_is_needed())
|
||||
return;
|
||||
|
||||
mptcp_for_each_subflow(msk, subflow) {
|
||||
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
|
||||
|
||||
sock_rps_record_flow(ssk);
|
||||
}
|
||||
}
|
||||
|
||||
static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
|
||||
{
|
||||
struct mptcp_sock *msk = mptcp_sk(sk);
|
||||
|
|
@ -1753,6 +1768,8 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
|
|||
|
||||
lock_sock(sk);
|
||||
|
||||
mptcp_rps_record_subflows(msk);
|
||||
|
||||
if (unlikely(inet_test_bit(DEFER_CONNECT, sk) ||
|
||||
msg->msg_flags & MSG_FASTOPEN)) {
|
||||
int copied_syn = 0;
|
||||
|
|
@ -2131,6 +2148,8 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
|
|||
goto out_err;
|
||||
}
|
||||
|
||||
mptcp_rps_record_subflows(msk);
|
||||
|
||||
timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
|
||||
|
||||
len = min_t(size_t, len, INT_MAX);
|
||||
|
|
@ -3922,6 +3941,8 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
|
|||
mptcp_sock_graft(ssk, newsock);
|
||||
}
|
||||
|
||||
mptcp_rps_record_subflows(msk);
|
||||
|
||||
/* Do late cleanup for the first subflow as necessary. Also
|
||||
* deal with bad peers not doing a complete shutdown.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -74,6 +74,17 @@ unset join_create_err
|
|||
unset join_bind_err
|
||||
unset join_connect_err
|
||||
|
||||
unset fb_ns1
|
||||
unset fb_ns2
|
||||
unset fb_infinite_map_tx
|
||||
unset fb_dss_corruption
|
||||
unset fb_simult_conn
|
||||
unset fb_mpc_passive
|
||||
unset fb_mpc_active
|
||||
unset fb_mpc_data
|
||||
unset fb_md5_sig
|
||||
unset fb_dss
|
||||
|
||||
# generated using "nfbpf_compile '(ip && (ip[54] & 0xf0) == 0x30) ||
|
||||
# (ip6 && (ip6[74] & 0xf0) == 0x30)'"
|
||||
CBPF_MPTCP_SUBOPTION_ADD_ADDR="14,
|
||||
|
|
@ -1399,6 +1410,115 @@ chk_join_tx_nr()
|
|||
print_results "join Tx" ${rc}
|
||||
}
|
||||
|
||||
chk_fallback_nr()
|
||||
{
|
||||
local infinite_map_tx=${fb_infinite_map_tx:-0}
|
||||
local dss_corruption=${fb_dss_corruption:-0}
|
||||
local simult_conn=${fb_simult_conn:-0}
|
||||
local mpc_passive=${fb_mpc_passive:-0}
|
||||
local mpc_active=${fb_mpc_active:-0}
|
||||
local mpc_data=${fb_mpc_data:-0}
|
||||
local md5_sig=${fb_md5_sig:-0}
|
||||
local dss=${fb_dss:-0}
|
||||
local rc=${KSFT_PASS}
|
||||
local ns=$1
|
||||
local count
|
||||
|
||||
count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtInfiniteMapTx")
|
||||
if [ -z "$count" ]; then
|
||||
rc=${KSFT_SKIP}
|
||||
elif [ "$count" != "$infinite_map_tx" ]; then
|
||||
rc=${KSFT_FAIL}
|
||||
print_check "$ns infinite map tx fallback"
|
||||
fail_test "got $count infinite map tx fallback[s] in $ns expected $infinite_map_tx"
|
||||
fi
|
||||
|
||||
count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtDSSCorruptionFallback")
|
||||
if [ -z "$count" ]; then
|
||||
rc=${KSFT_SKIP}
|
||||
elif [ "$count" != "$dss_corruption" ]; then
|
||||
rc=${KSFT_FAIL}
|
||||
print_check "$ns dss corruption fallback"
|
||||
fail_test "got $count dss corruption fallback[s] in $ns expected $dss_corruption"
|
||||
fi
|
||||
|
||||
count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtSimultConnectFallback")
|
||||
if [ -z "$count" ]; then
|
||||
rc=${KSFT_SKIP}
|
||||
elif [ "$count" != "$simult_conn" ]; then
|
||||
rc=${KSFT_FAIL}
|
||||
print_check "$ns simult conn fallback"
|
||||
fail_test "got $count simult conn fallback[s] in $ns expected $simult_conn"
|
||||
fi
|
||||
|
||||
count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMPCapableFallbackACK")
|
||||
if [ -z "$count" ]; then
|
||||
rc=${KSFT_SKIP}
|
||||
elif [ "$count" != "$mpc_passive" ]; then
|
||||
rc=${KSFT_FAIL}
|
||||
print_check "$ns mpc passive fallback"
|
||||
fail_test "got $count mpc passive fallback[s] in $ns expected $mpc_passive"
|
||||
fi
|
||||
|
||||
count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMPCapableFallbackSYNACK")
|
||||
if [ -z "$count" ]; then
|
||||
rc=${KSFT_SKIP}
|
||||
elif [ "$count" != "$mpc_active" ]; then
|
||||
rc=${KSFT_FAIL}
|
||||
print_check "$ns mpc active fallback"
|
||||
fail_test "got $count mpc active fallback[s] in $ns expected $mpc_active"
|
||||
fi
|
||||
|
||||
count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMPCapableDataFallback")
|
||||
if [ -z "$count" ]; then
|
||||
rc=${KSFT_SKIP}
|
||||
elif [ "$count" != "$mpc_data" ]; then
|
||||
rc=${KSFT_FAIL}
|
||||
print_check "$ns mpc data fallback"
|
||||
fail_test "got $count mpc data fallback[s] in $ns expected $mpc_data"
|
||||
fi
|
||||
|
||||
count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMD5SigFallback")
|
||||
if [ -z "$count" ]; then
|
||||
rc=${KSFT_SKIP}
|
||||
elif [ "$count" != "$md5_sig" ]; then
|
||||
rc=${KSFT_FAIL}
|
||||
print_check "$ns MD5 Sig fallback"
|
||||
fail_test "got $count MD5 Sig fallback[s] in $ns expected $md5_sig"
|
||||
fi
|
||||
|
||||
count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtDssFallback")
|
||||
if [ -z "$count" ]; then
|
||||
rc=${KSFT_SKIP}
|
||||
elif [ "$count" != "$dss" ]; then
|
||||
rc=${KSFT_FAIL}
|
||||
print_check "$ns dss fallback"
|
||||
fail_test "got $count dss fallback[s] in $ns expected $dss"
|
||||
fi
|
||||
|
||||
return $rc
|
||||
}
|
||||
|
||||
chk_fallback_nr_all()
|
||||
{
|
||||
local netns=("ns1" "ns2")
|
||||
local fb_ns=("fb_ns1" "fb_ns2")
|
||||
local rc=${KSFT_PASS}
|
||||
|
||||
for i in 0 1; do
|
||||
if [ -n "${!fb_ns[i]}" ]; then
|
||||
eval "${!fb_ns[i]}" \
|
||||
chk_fallback_nr ${netns[i]} || rc=${?}
|
||||
else
|
||||
chk_fallback_nr ${netns[i]} || rc=${?}
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "${rc}" != "${KSFT_PASS}" ]; then
|
||||
print_results "fallback" ${rc}
|
||||
fi
|
||||
}
|
||||
|
||||
chk_join_nr()
|
||||
{
|
||||
local syn_nr=$1
|
||||
|
|
@ -1484,6 +1604,8 @@ chk_join_nr()
|
|||
join_syn_tx="${join_syn_tx:-${syn_nr}}" \
|
||||
chk_join_tx_nr
|
||||
|
||||
chk_fallback_nr_all
|
||||
|
||||
if $validate_checksum; then
|
||||
chk_csum_nr $csum_ns1 $csum_ns2
|
||||
chk_fail_nr $fail_nr $fail_nr
|
||||
|
|
@ -3337,6 +3459,7 @@ fail_tests()
|
|||
join_csum_ns1=+1 join_csum_ns2=+0 \
|
||||
join_fail_nr=1 join_rst_nr=0 join_infi_nr=1 \
|
||||
join_corrupted_pkts="$(pedit_action_pkts)" \
|
||||
fb_ns1="fb_dss=1" fb_ns2="fb_infinite_map_tx=1" \
|
||||
chk_join_nr 0 0 0
|
||||
chk_fail_nr 1 -1 invert
|
||||
fi
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user