mirror of
https://github.com/torvalds/linux.git
synced 2026-06-03 12:03:54 +02:00
Merge branch 'bpf-skmsg-fix-verdict-sk_data_ready-racing-with-ktls-rx'
Xingwang Xiang says:
====================
bpf, skmsg: fix verdict sk_data_ready racing with ktls rx
sk_psock_verdict_data_ready() lacks the tls_sw_has_ctx_rx() guard that
sk_psock_strp_data_ready() gained in e91de6afa8. When a socket is
inserted into a sockmap (BPF_SK_SKB_VERDICT) before TLS RX is configured,
the missing guard causes tcp_read_skb() to drain sk_receive_queue without
advancing copied_seq, leaving a dangling frag_list pointer that
tls_decrypt_sg() walks — a use-after-free.
Patch 1 mirrors the fix from e91de6afa81c: add the tls_sw_has_ctx_rx()
check to sk_psock_verdict_data_ready() so that when a TLS RX context is
present the function defers to psock->saved_data_ready (sock_def_readable)
instead of calling tcp_read_skb().
Patch 2 adds a selftest that drives the vulnerable sequence end-to-end
and verifies recv() returns the correct decrypted data.
====================
Link: https://patch.msgid.link/20260517145630.20521-1-v3rdant.xiang@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
commit
4a2844dcc0
|
|
@ -1268,12 +1268,19 @@ static int sk_psock_verdict_recv(struct sock *sk, struct sk_buff *skb)
|
|||
static void sk_psock_verdict_data_ready(struct sock *sk)
|
||||
{
|
||||
const struct proto_ops *ops = NULL;
|
||||
struct sk_psock *psock;
|
||||
struct socket *sock;
|
||||
int copied;
|
||||
|
||||
trace_sk_data_ready(sk);
|
||||
|
||||
rcu_read_lock();
|
||||
psock = sk_psock(sk);
|
||||
if (psock && tls_sw_has_ctx_rx(sk)) {
|
||||
psock->saved_data_ready(sk);
|
||||
rcu_read_unlock();
|
||||
return;
|
||||
}
|
||||
sock = READ_ONCE(sk->sk_socket);
|
||||
if (likely(sock))
|
||||
ops = READ_ONCE(sock->ops);
|
||||
|
|
@ -1283,8 +1290,6 @@ static void sk_psock_verdict_data_ready(struct sock *sk)
|
|||
|
||||
copied = ops->read_skb(sk, sk_psock_verdict_recv);
|
||||
if (copied >= 0) {
|
||||
struct sk_psock *psock;
|
||||
|
||||
rcu_read_lock();
|
||||
psock = sk_psock(sk);
|
||||
if (psock)
|
||||
|
|
|
|||
|
|
@ -417,6 +417,107 @@ static void run_tests(int family, enum bpf_map_type map_type)
|
|||
close(map);
|
||||
}
|
||||
|
||||
/*
|
||||
* Regression test for the KTLS + sockmap (verdict) reverse-order UAF.
|
||||
*
|
||||
* Vulnerable sequence:
|
||||
* 1. Insert receiver socket into sockmap with BPF_SK_SKB_VERDICT program.
|
||||
* sk->sk_data_ready becomes sk_psock_verdict_data_ready.
|
||||
* 2. Configure TLS RX: tls_sw_strparser_arm() saves
|
||||
* sk_psock_verdict_data_ready as rx_ctx->saved_data_ready.
|
||||
*
|
||||
* When data arrives, tls_rx_msg_ready() calls saved_data_ready() =
|
||||
* sk_psock_verdict_data_ready(), which calls tcp_read_skb() and drains
|
||||
* sk_receive_queue via __skb_unlink() without advancing copied_seq.
|
||||
* tls_strp_msg_load() then finds the queue empty while tcp_inq() is still
|
||||
* non-zero, hits WARN_ON_ONCE(!first), and leaves a dangling frag_list
|
||||
* pointer that tls_decrypt_sg() walks — a use-after-free.
|
||||
*
|
||||
* The fix adds a tls_sw_has_ctx_rx() check to sk_psock_verdict_data_ready(),
|
||||
* mirroring what sk_psock_strp_data_ready() already does: when a TLS RX
|
||||
* context is present, defer to psock->saved_data_ready (sock_def_readable)
|
||||
* instead of calling tcp_read_skb(), so TLS retains sole ownership of the
|
||||
* receive queue. Data is then decrypted and returned correctly by
|
||||
* tls_sw_recvmsg().
|
||||
*/
|
||||
static void test_sockmap_ktls_verdict_with_tls_rx(int family, int sotype)
|
||||
{
|
||||
struct tls12_crypto_info_aes_gcm_128 crypto_info = {};
|
||||
char send_buf[] = "hello ktls sockmap reverse order";
|
||||
char recv_buf[sizeof(send_buf)] = {};
|
||||
struct test_sockmap_ktls *skel;
|
||||
int c = -1, p = -1, zero = 0;
|
||||
int prog_fd, map_fd;
|
||||
ssize_t n;
|
||||
int err;
|
||||
|
||||
skel = test_sockmap_ktls__open_and_load();
|
||||
if (!ASSERT_TRUE(skel, "open_and_load"))
|
||||
return;
|
||||
|
||||
err = create_pair(family, sotype, &c, &p);
|
||||
if (!ASSERT_OK(err, "create_pair"))
|
||||
goto out;
|
||||
|
||||
prog_fd = bpf_program__fd(skel->progs.prog_skb_verdict_pass);
|
||||
map_fd = bpf_map__fd(skel->maps.sock_map_verdict);
|
||||
|
||||
err = bpf_prog_attach(prog_fd, map_fd, BPF_SK_SKB_VERDICT, 0);
|
||||
if (!ASSERT_OK(err, "bpf_prog_attach sk_skb verdict"))
|
||||
goto out;
|
||||
|
||||
/* Step 1: configure TLS TX on sender (no sockmap involvement) */
|
||||
err = setsockopt(c, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls"));
|
||||
if (!ASSERT_OK(err, "setsockopt(TCP_ULP) client"))
|
||||
goto out;
|
||||
|
||||
crypto_info.info.version = TLS_1_2_VERSION;
|
||||
crypto_info.info.cipher_type = TLS_CIPHER_AES_GCM_128;
|
||||
memset(crypto_info.key, 0x01, sizeof(crypto_info.key));
|
||||
memset(crypto_info.salt, 0x02, sizeof(crypto_info.salt));
|
||||
|
||||
err = setsockopt(c, SOL_TLS, TLS_TX, &crypto_info, sizeof(crypto_info));
|
||||
if (!ASSERT_OK(err, "setsockopt(TLS_TX)"))
|
||||
goto out;
|
||||
|
||||
/* Step 2: insert receiver into sockmap BEFORE TLS RX */
|
||||
err = bpf_map_update_elem(map_fd, &zero, &p, BPF_NOEXIST);
|
||||
if (!ASSERT_OK(err, "bpf_map_update_elem"))
|
||||
goto out;
|
||||
|
||||
/* Step 3: configure TLS RX AFTER sockmap insertion */
|
||||
err = setsockopt(p, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls"));
|
||||
if (!ASSERT_OK(err, "setsockopt(TCP_ULP) server"))
|
||||
goto out;
|
||||
|
||||
err = setsockopt(p, SOL_TLS, TLS_RX, &crypto_info, sizeof(crypto_info));
|
||||
if (!ASSERT_OK(err, "setsockopt(TLS_RX)"))
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* A buggy kernel hits WARN_ON_ONCE in tls_strp_load_anchor_with_queue
|
||||
* and may UAF in tls_decrypt_sg here. With the fix,
|
||||
* sk_psock_verdict_data_ready defers to sock_def_readable and TLS
|
||||
* decrypts the record normally.
|
||||
*/
|
||||
n = send(c, send_buf, sizeof(send_buf), 0);
|
||||
if (!ASSERT_EQ(n, (ssize_t)sizeof(send_buf), "send"))
|
||||
goto out;
|
||||
|
||||
n = recv_timeout(p, recv_buf, sizeof(recv_buf), 0, 5);
|
||||
if (!ASSERT_EQ(n, (ssize_t)sizeof(send_buf), "recv"))
|
||||
goto out;
|
||||
|
||||
ASSERT_OK(memcmp(send_buf, recv_buf, sizeof(send_buf)), "data integrity");
|
||||
|
||||
out:
|
||||
if (c != -1)
|
||||
close(c);
|
||||
if (p != -1)
|
||||
close(p);
|
||||
test_sockmap_ktls__destroy(skel);
|
||||
}
|
||||
|
||||
static void run_ktls_test(int family, int sotype)
|
||||
{
|
||||
if (test__start_subtest("tls simple offload"))
|
||||
|
|
@ -429,6 +530,8 @@ static void run_ktls_test(int family, int sotype)
|
|||
test_sockmap_ktls_tx_no_buf(family, sotype, true);
|
||||
if (test__start_subtest("tls tx with pop"))
|
||||
test_sockmap_ktls_tx_pop(family, sotype);
|
||||
if (test__start_subtest("tls verdict with tls rx"))
|
||||
test_sockmap_ktls_verdict_with_tls_rx(family, sotype);
|
||||
}
|
||||
|
||||
void test_sockmap_ktls(void)
|
||||
|
|
|
|||
|
|
@ -17,6 +17,13 @@ struct {
|
|||
__type(value, int);
|
||||
} sock_map SEC(".maps");
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_SOCKMAP);
|
||||
__uint(max_entries, 2);
|
||||
__type(key, int);
|
||||
__type(value, int);
|
||||
} sock_map_verdict SEC(".maps");
|
||||
|
||||
SEC("sk_msg")
|
||||
int prog_sk_policy(struct sk_msg_md *msg)
|
||||
{
|
||||
|
|
@ -38,3 +45,17 @@ int prog_sk_policy_redir(struct sk_msg_md *msg)
|
|||
bpf_msg_apply_bytes(msg, apply_bytes);
|
||||
return bpf_msg_redirect_map(msg, &sock_map, two, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Verdict program for the reverse-order TLS/sockmap regression test.
|
||||
* Returns SK_PASS so tcp_read_skb() drains the receive queue via
|
||||
* sk_psock_verdict_recv() without calling tcp_eat_skb(), which is
|
||||
* the precondition for the KTLS strparser frag_list UAF.
|
||||
*/
|
||||
SEC("sk_skb/verdict")
|
||||
int prog_skb_verdict_pass(struct __sk_buff *skb)
|
||||
{
|
||||
return SK_PASS;
|
||||
}
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user