Merge branch 'bpf-allow-void-cast-using-bpf_rdonly_cast'

Eduard Zingerman says:

====================
bpf: allow void* cast using bpf_rdonly_cast()

Currently, pointers returned by `bpf_rdonly_cast()` have a type of
"pointer to btf id", and only casts to structure types are allowed.
Access to memory pointed to by these pointers is done through
`BPF_PROBE_{MEM,MEMSX}` instructions and does not produce errors on
invalid memory access.

This patch set extends `bpf_rdonly_cast()` to allow casts to an
equivalent of 'void *', effectively replacing
`bpf_probe_read_kernel()` calls in situations where access to
individual bytes or integers is necessary.

The mechanism was suggested and explored by Andrii Nakryiko in [1].

To help with detecting support for this feature, an
`enum bpf_features` is added with intended usage as follows:

  if (bpf_core_enum_value_exists(enum bpf_features,
                                 BPF_FEAT_RDONLY_CAST_TO_VOID))
    ...

[1] https://github.com/anakryiko/linux/tree/bpf-mem-cast

Changelog:

v2: https://lore.kernel.org/bpf/20250625000520.2700423-1-eddyz87@gmail.com/
v2 -> v3:
- dropped direct numbering for __MAX_BPF_FEAT.

v1: https://lore.kernel.org/bpf/20250624191009.902874-1-eddyz87@gmail.com/
v1 -> v2:
- renamed BPF_FEAT_TOTAL to __MAX_BPF_FEAT and moved patch introducing
  bpf_features enum to the start of the series (Alexei);
- dropped patch #3 allowing optout from CAP_SYS_ADMIN drop in
  prog_tests/verifier.c, use a separate runner in prog_tests/*
  instead.
====================

Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://patch.msgid.link/20250625182414.30659-1-eddyz87@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
Alexei Starovoitov 2025-06-25 15:13:16 -07:00
commit 0ed5f79987
3 changed files with 212 additions and 12 deletions

View File

@ -44,6 +44,11 @@ static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
#undef BPF_LINK_TYPE
};
enum bpf_features {
BPF_FEAT_RDONLY_CAST_TO_VOID = 0,
__MAX_BPF_FEAT,
};
struct bpf_mem_alloc bpf_global_percpu_ma;
static bool bpf_global_percpu_ma_set;
@ -7535,6 +7540,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
}
} else if (base_type(reg->type) == PTR_TO_MEM) {
bool rdonly_mem = type_is_rdonly_mem(reg->type);
bool rdonly_untrusted = rdonly_mem && (reg->type & PTR_UNTRUSTED);
if (type_may_be_null(reg->type)) {
verbose(env, "R%d invalid mem access '%s'\n", regno,
@ -7554,8 +7560,13 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
return -EACCES;
}
err = check_mem_region_access(env, regno, off, size,
reg->mem_size, false);
/*
* Accesses to untrusted PTR_TO_MEM are done through probe
* instructions, hence no need to check bounds in that case.
*/
if (!rdonly_untrusted)
err = check_mem_region_access(env, regno, off, size,
reg->mem_size, false);
if (!err && value_regno >= 0 && (t == BPF_READ || rdonly_mem))
mark_reg_unknown(env, regs, value_regno);
} else if (reg->type == PTR_TO_CTX) {
@ -13602,16 +13613,24 @@ static int check_special_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_ca
regs[BPF_REG_0].btf_id = meta->ret_btf_id;
} else if (meta->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
ret_t = btf_type_by_id(desc_btf, meta->arg_constant.value);
if (!ret_t || !btf_type_is_struct(ret_t)) {
if (!ret_t) {
verbose(env, "Unknown type ID %lld passed to kfunc bpf_rdonly_cast\n",
meta->arg_constant.value);
return -EINVAL;
} else if (btf_type_is_struct(ret_t)) {
mark_reg_known_zero(env, regs, BPF_REG_0);
regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_UNTRUSTED;
regs[BPF_REG_0].btf = desc_btf;
regs[BPF_REG_0].btf_id = meta->arg_constant.value;
} else if (btf_type_is_void(ret_t)) {
mark_reg_known_zero(env, regs, BPF_REG_0);
regs[BPF_REG_0].type = PTR_TO_MEM | MEM_RDONLY | PTR_UNTRUSTED;
regs[BPF_REG_0].mem_size = 0;
} else {
verbose(env,
"kfunc bpf_rdonly_cast type ID argument must be of a struct\n");
"kfunc bpf_rdonly_cast type ID argument must be of a struct or void\n");
return -EINVAL;
}
mark_reg_known_zero(env, regs, BPF_REG_0);
regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_UNTRUSTED;
regs[BPF_REG_0].btf = desc_btf;
regs[BPF_REG_0].btf_id = meta->arg_constant.value;
} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_slice] ||
meta->func_id == special_kfunc_list[KF_bpf_dynptr_slice_rdwr]) {
enum bpf_type_flag type_flag = get_dynptr_type_flag(meta->initialized_dynptr.type);
@ -14410,6 +14429,13 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
return -EACCES;
}
/*
* Accesses to untrusted PTR_TO_MEM are done through probe
* instructions, hence no need to track offsets.
*/
if (base_type(ptr_reg->type) == PTR_TO_MEM && (ptr_reg->type & PTR_UNTRUSTED))
return 0;
switch (base_type(ptr_reg->type)) {
case PTR_TO_CTX:
case PTR_TO_MAP_VALUE:
@ -19618,10 +19644,27 @@ static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
!reg_type_mismatch_ok(prev));
}
static bool is_ptr_to_mem_or_btf_id(enum bpf_reg_type type)
{
switch (base_type(type)) {
case PTR_TO_MEM:
case PTR_TO_BTF_ID:
return true;
default:
return false;
}
}
static bool is_ptr_to_mem(enum bpf_reg_type type)
{
return base_type(type) == PTR_TO_MEM;
}
static int save_aux_ptr_type(struct bpf_verifier_env *env, enum bpf_reg_type type,
bool allow_trust_mismatch)
{
enum bpf_reg_type *prev_type = &env->insn_aux_data[env->insn_idx].ptr_type;
enum bpf_reg_type merged_type;
if (*prev_type == NOT_INIT) {
/* Saw a valid insn
@ -19638,15 +19681,24 @@ static int save_aux_ptr_type(struct bpf_verifier_env *env, enum bpf_reg_type typ
* Reject it.
*/
if (allow_trust_mismatch &&
base_type(type) == PTR_TO_BTF_ID &&
base_type(*prev_type) == PTR_TO_BTF_ID) {
is_ptr_to_mem_or_btf_id(type) &&
is_ptr_to_mem_or_btf_id(*prev_type)) {
/*
* Have to support a use case when one path through
* the program yields TRUSTED pointer while another
* is UNTRUSTED. Fallback to UNTRUSTED to generate
* BPF_PROBE_MEM/BPF_PROBE_MEMSX.
* Same behavior of MEM_RDONLY flag.
*/
*prev_type = PTR_TO_BTF_ID | PTR_UNTRUSTED;
if (is_ptr_to_mem(type) || is_ptr_to_mem(*prev_type))
merged_type = PTR_TO_MEM;
else
merged_type = PTR_TO_BTF_ID;
if ((type & PTR_UNTRUSTED) || (*prev_type & PTR_UNTRUSTED))
merged_type |= PTR_UNTRUSTED;
if ((type & MEM_RDONLY) || (*prev_type & MEM_RDONLY))
merged_type |= MEM_RDONLY;
*prev_type = merged_type;
} else {
verbose(env, "same insn cannot be used with different pointers\n");
return -EINVAL;
@ -21254,6 +21306,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
* for this case.
*/
case PTR_TO_BTF_ID | MEM_ALLOC | PTR_UNTRUSTED:
case PTR_TO_MEM | MEM_RDONLY | PTR_UNTRUSTED:
if (type == BPF_READ) {
if (BPF_MODE(insn->code) == BPF_MEM)
insn->code = BPF_LDX | BPF_PROBE_MEM |
@ -24439,6 +24492,8 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
u32 log_true_size;
bool is_priv;
BTF_TYPE_EMIT(enum bpf_features);
/* no program is valid */
if (ARRAY_SIZE(bpf_verifier_ops) == 0)
return -EINVAL;

View File

@ -0,0 +1,9 @@
// SPDX-License-Identifier: GPL-2.0-only
#include <test_progs.h>
#include "mem_rdonly_untrusted.skel.h"
void test_mem_rdonly_untrusted(void)
{
RUN_TESTS(mem_rdonly_untrusted);
}

View File

@ -0,0 +1,136 @@
// SPDX-License-Identifier: GPL-2.0
#include <vmlinux.h>
#include <bpf/bpf_core_read.h>
#include "bpf_misc.h"
#include "../test_kmods/bpf_testmod_kfunc.h"
SEC("socket")
__success
__retval(0)
int ldx_is_ok_bad_addr(void *ctx)
{
char *p;
if (!bpf_core_enum_value_exists(enum bpf_features, BPF_FEAT_RDONLY_CAST_TO_VOID))
return 42;
p = bpf_rdonly_cast(0, 0);
return p[0x7fff];
}
SEC("socket")
__success
__retval(1)
int ldx_is_ok_good_addr(void *ctx)
{
int v, *p;
v = 1;
p = bpf_rdonly_cast(&v, 0);
return *p;
}
SEC("socket")
__success
int offset_not_tracked(void *ctx)
{
int *p, i, s;
p = bpf_rdonly_cast(0, 0);
s = 0;
bpf_for(i, 0, 1000 * 1000 * 1000) {
p++;
s += *p;
}
return s;
}
SEC("socket")
__failure
__msg("cannot write into rdonly_untrusted_mem")
int stx_not_ok(void *ctx)
{
int v, *p;
v = 1;
p = bpf_rdonly_cast(&v, 0);
*p = 1;
return 0;
}
SEC("socket")
__failure
__msg("cannot write into rdonly_untrusted_mem")
int atomic_not_ok(void *ctx)
{
int v, *p;
v = 1;
p = bpf_rdonly_cast(&v, 0);
__sync_fetch_and_add(p, 1);
return 0;
}
SEC("socket")
__failure
__msg("cannot write into rdonly_untrusted_mem")
int atomic_rmw_not_ok(void *ctx)
{
long v, *p;
v = 1;
p = bpf_rdonly_cast(&v, 0);
return __sync_val_compare_and_swap(p, 0, 42);
}
SEC("socket")
__failure
__msg("invalid access to memory, mem_size=0 off=0 size=4")
__msg("R1 min value is outside of the allowed memory range")
int kfunc_param_not_ok(void *ctx)
{
int *p;
p = bpf_rdonly_cast(0, 0);
bpf_kfunc_trusted_num_test(p);
return 0;
}
SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
__failure
__msg("R1 type=rdonly_untrusted_mem expected=")
int helper_param_not_ok(void *ctx)
{
char *p;
p = bpf_rdonly_cast(0, 0);
/*
* Any helper with ARG_CONST_SIZE_OR_ZERO constraint will do,
* the most permissive constraint
*/
bpf_copy_from_user(p, 0, (void *)42);
return 0;
}
static __noinline u64 *get_some_addr(void)
{
if (bpf_get_prandom_u32())
return bpf_rdonly_cast(0, bpf_core_type_id_kernel(struct sock));
else
return bpf_rdonly_cast(0, 0);
}
SEC("socket")
__success
__retval(0)
int mixed_mem_type(void *ctx)
{
u64 *p;
/* Try to avoid compiler hoisting load to if branches by using __noinline func. */
p = get_some_addr();
return *p;
}
char _license[] SEC("license") = "GPL";