mirror of
https://github.com/torvalds/linux.git
synced 2026-05-12 16:18:45 +02:00
bpf: Reject TCP_NODELAY in bpf-tcp-cc
A BPF TCP congestion control program can call bpf_setsockopt() from
its callbacks. In current kernels, if it calls
bpf_setsockopt(TCP_NODELAY) from cwnd_event_tx_start(), the call can
re-enter the TCP transmit path before the outer tcp_transmit_skb()
has completed and advanced the send head.
This can re-trigger CA_EVENT_TX_START and lead to unbounded recursion:
tcp_transmit_skb()
-> tcp_event_data_sent()
-> tcp_ca_event(sk, CA_EVENT_TX_START)
-> cwnd_event_tx_start()
-> bpf_setsockopt(TCP_NODELAY)
-> tcp_push_pending_frames()
-> tcp_write_xmit()
-> tcp_transmit_skb()
This leads to unbounded recursion and can overflow the kernel stack.
Reject TCP_NODELAY with -EOPNOTSUPP for bpf-tcp-cc by introducing
a dedicated setsockopt proto for BPF_PROG_TYPE_STRUCT_OPS TCP
congestion control programs. To keep it simple, all tcp-cc ops is
rejected for TCP_NODELAY.
Fixes: 7e41df5dbb ("bpf: Add a few optnames to bpf_setsockopt")
Suggested-by: Martin KaFai Lau <martin.lau@linux.dev>
Signed-off-by: KaFai Wan <kafai.wan@linux.dev>
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Reviewed-by: Jiayuan Chen <jiayuan.chen@linux.dev>
Link: https://patch.msgid.link/20260421155804.135786-3-kafai.wan@linux.dev
This commit is contained in:
parent
846c76ecc0
commit
54377fcab5
|
|
@ -3725,6 +3725,7 @@ extern const struct bpf_func_proto bpf_for_each_map_elem_proto;
|
|||
extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto;
|
||||
extern const struct bpf_func_proto bpf_sk_setsockopt_proto;
|
||||
extern const struct bpf_func_proto bpf_sk_getsockopt_proto;
|
||||
extern const struct bpf_func_proto bpf_sk_setsockopt_nodelay_proto;
|
||||
extern const struct bpf_func_proto bpf_unlocked_sk_setsockopt_proto;
|
||||
extern const struct bpf_func_proto bpf_unlocked_sk_getsockopt_proto;
|
||||
extern const struct bpf_func_proto bpf_find_vma_proto;
|
||||
|
|
|
|||
|
|
@ -5688,6 +5688,30 @@ const struct bpf_func_proto bpf_sk_getsockopt_proto = {
|
|||
.arg5_type = ARG_CONST_SIZE,
|
||||
};
|
||||
|
||||
BPF_CALL_5(bpf_sk_setsockopt_nodelay, struct sock *, sk, int, level,
|
||||
int, optname, char *, optval, int, optlen)
|
||||
{
|
||||
/*
|
||||
* TCP_NODELAY triggers tcp_push_pending_frames() and re-enters
|
||||
* CA_EVENT_TX_START in bpf_tcp_cc.
|
||||
*/
|
||||
if (level == SOL_TCP && optname == TCP_NODELAY)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
return _bpf_setsockopt(sk, level, optname, optval, optlen);
|
||||
}
|
||||
|
||||
const struct bpf_func_proto bpf_sk_setsockopt_nodelay_proto = {
|
||||
.func = bpf_sk_setsockopt_nodelay,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
|
||||
.arg2_type = ARG_ANYTHING,
|
||||
.arg3_type = ARG_ANYTHING,
|
||||
.arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY,
|
||||
.arg5_type = ARG_CONST_SIZE,
|
||||
};
|
||||
|
||||
BPF_CALL_5(bpf_unlocked_sk_setsockopt, struct sock *, sk, int, level,
|
||||
int, optname, char *, optval, int, optlen)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -168,7 +168,7 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id,
|
|||
*/
|
||||
if (prog_ops_moff(prog) !=
|
||||
offsetof(struct tcp_congestion_ops, release))
|
||||
return &bpf_sk_setsockopt_proto;
|
||||
return &bpf_sk_setsockopt_nodelay_proto;
|
||||
return NULL;
|
||||
case BPF_FUNC_getsockopt:
|
||||
/* Since get/setsockopt is usually expected to
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user