Merge branch 'bpf-report-arena-faults-to-bpf-streams'

Puranjay Mohan says:

====================
bpf: report arena faults to BPF streams

Changes in v6->v7:
v6: https://lore.kernel.org/all/20250908163638.23150-1-puranjay@kernel.org/
- Added comments about the usage of arena_reg in x86 and arm64 jits. (Alexei)
- Used clear_lo32() for clearing the lower 32-bits of user_vm_start. (Alexei)
- Moved update of the old tests to use __stderr to a separate commit (Eduard)
- Used test__skip() in prog_tests/stream.c (Eduard)
- Start a sub-test for read / write

Changes in v5->v6:
v5: https://lore.kernel.org/all/20250901193730.43543-1-puranjay@kernel.org/
- Introduces __stderr and __stdout for easy testing of bpf streams
  (Eduard)
- Add more test cases for arena fault reporting (subprog and callback)
- Fix main_prog_aux usage and return main_prog from find_from_stack_cb
  (Kumar)
- Properly fix the build issue reported by kernel test robot

Changes in v4->v5:
v4: https://lore.kernel.org/all/20250827153728.28115-1-puranjay@kernel.org/
- Added patch 2 to introducing main_prog_aux for easier access to
  streams.
- Fixed bug in fault handlers when arena_reg == dst_reg
- Updated selftest to check test above edge case.
- Added comments about the usage of barrier_var() in code and commit
  message.

Changes in v3->v4:
v3: https://lore.kernel.org/all/20250827150113.15763-1-puranjay@kernel.org/
- Fixed a build issue when CONFIG_BPF_JIT=y and # CONFIG_BPF_SYSCALL is
  not set

Changes in v2->v3:
v2: https://lore.kernel.org/all/20250811111828.13836-1-puranjay@kernel.org/
- Improved the selftest to check the exact fault address
- Dropped BPF_NO_KFUNC_PROTOTYPES and bpf_arena_alloc/free_pages() usage
- Rebased on bpf-next/master

Changes in v1->v2:
v1: https://lore.kernel.org/all/20250806085847.18633-1-puranjay@kernel.org/
- Changed variable and mask names for consistency (Yonghong)
- Added Acked-by: Yonghong Song <yonghong.song@linux.dev> on two patches

This set adds the support of reporting page faults inside arena to BPF
stderr stream. The reported address is the one that a user would expect
to see if they pass it to bpf_printk();

Here is an example output from the stderr stream and bpf_printk()

ERROR: Arena WRITE access at unmapped address 0xdeaddead0000
CPU: 9 UID: 0 PID: 502 Comm: test_progs
Call trace:
bpf_stream_stage_dump_stack+0xc0/0x150
bpf_prog_report_arena_violation+0x98/0xf0
ex_handler_bpf+0x5c/0x78
fixup_exception+0xf8/0x160
__do_kernel_fault+0x40/0x188
do_bad_area+0x70/0x88
do_translation_fault+0x54/0x98
do_mem_abort+0x4c/0xa8
el1_abort+0x44/0x70
el1h_64_sync_handler+0x50/0x108
el1h_64_sync+0x6c/0x70
bpf_prog_a64a9778d31b8e88_stream_arena_write_fault+0x84/0xc8
  *(page) = 1; @ stream.c:100
bpf_prog_test_run_syscall+0x100/0x328
__sys_bpf+0x508/0xb98
__arm64_sys_bpf+0x2c/0x48
invoke_syscall+0x50/0x120
el0_svc_common.constprop.0+0x48/0xf8
do_el0_svc+0x28/0x40
el0_svc+0x48/0xf8
el0t_64_sync_handler+0xa0/0xe8
el0t_64_sync+0x198/0x1a0

Same address is printed by bpf_printk():

1389.078831: bpf_trace_printk: Read Address: 0xdeaddead0000

To make this possible, some extra metadata has to be passed to the bpf
exception handler, so the bpf exception handling mechanism for both
x86-64 and arm64 have been improved in this set.

The streams selftest has been updated to test this new feature.
====================

Link: https://patch.msgid.link/20250911145808.58042-1-puranjay@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
Alexei Starovoitov 2025-09-11 13:00:44 -07:00
commit a578b54a8a
10 changed files with 491 additions and 112 deletions

View File

@ -1066,19 +1066,53 @@ static void build_epilogue(struct jit_ctx *ctx, bool was_classic)
emit(A64_RET(A64_LR), ctx);
}
#define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0)
/*
* Metadata encoding for exception handling in JITed code.
*
* Format of `fixup` field in `struct exception_table_entry`:
*
* Bit layout of `fixup` (32-bit):
*
* +-----------+--------+-----------+-----------+----------+
* | 31-27 | 26-22 | 21 | 20-16 | 15-0 |
* | | | | | |
* | FIXUP_REG | Unused | ARENA_ACC | ARENA_REG | OFFSET |
* +-----------+--------+-----------+-----------+----------+
*
* - OFFSET (16 bits): Offset used to compute address for Load/Store instruction.
* - ARENA_REG (5 bits): Register that is used to calculate the address for load/store when
* accessing the arena region.
* - ARENA_ACCESS (1 bit): This bit is set when the faulting instruction accessed the arena region.
* - FIXUP_REG (5 bits): Destination register for the load instruction (cleared on fault) or set to
* DONT_CLEAR if it is a store instruction.
*/
#define BPF_FIXUP_OFFSET_MASK GENMASK(15, 0)
#define BPF_FIXUP_ARENA_REG_MASK GENMASK(20, 16)
#define BPF_ARENA_ACCESS BIT(21)
#define BPF_FIXUP_REG_MASK GENMASK(31, 27)
#define DONT_CLEAR 5 /* Unused ARM64 register from BPF's POV */
bool ex_handler_bpf(const struct exception_table_entry *ex,
struct pt_regs *regs)
{
off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup);
int dst_reg = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup);
s16 off = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup);
int arena_reg = FIELD_GET(BPF_FIXUP_ARENA_REG_MASK, ex->fixup);
bool is_arena = !!(ex->fixup & BPF_ARENA_ACCESS);
bool is_write = (dst_reg == DONT_CLEAR);
unsigned long addr;
if (is_arena) {
addr = regs->regs[arena_reg] + off;
bpf_prog_report_arena_violation(is_write, addr, regs->pc);
}
if (dst_reg != DONT_CLEAR)
regs->regs[dst_reg] = 0;
regs->pc = (unsigned long)&ex->fixup - offset;
/* Skip the faulting instruction */
regs->pc += AARCH64_INSN_SIZE;
return true;
}
@ -1088,7 +1122,9 @@ static int add_exception_handler(const struct bpf_insn *insn,
int dst_reg)
{
off_t ins_offset;
off_t fixup_offset;
s16 off = insn->off;
bool is_arena;
int arena_reg;
unsigned long pc;
struct exception_table_entry *ex;
@ -1102,6 +1138,9 @@ static int add_exception_handler(const struct bpf_insn *insn,
BPF_MODE(insn->code) != BPF_PROBE_ATOMIC)
return 0;
is_arena = (BPF_MODE(insn->code) == BPF_PROBE_MEM32) ||
(BPF_MODE(insn->code) == BPF_PROBE_ATOMIC);
if (!ctx->prog->aux->extable ||
WARN_ON_ONCE(ctx->exentry_idx >= ctx->prog->aux->num_exentries))
return -EINVAL;
@ -1119,22 +1158,6 @@ static int add_exception_handler(const struct bpf_insn *insn,
if (WARN_ON_ONCE(ins_offset >= 0 || ins_offset < INT_MIN))
return -ERANGE;
/*
* Since the extable follows the program, the fixup offset is always
* negative and limited to BPF_JIT_REGION_SIZE. Store a positive value
* to keep things simple, and put the destination register in the upper
* bits. We don't need to worry about buildtime or runtime sort
* modifying the upper bits because the table is already sorted, and
* isn't part of the main exception table.
*
* The fixup_offset is set to the next instruction from the instruction
* that may fault. The execution will jump to this after handling the
* fault.
*/
fixup_offset = (long)&ex->fixup - (pc + AARCH64_INSN_SIZE);
if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, fixup_offset))
return -ERANGE;
/*
* The offsets above have been calculated using the RO buffer but we
* need to use the R/W buffer for writes.
@ -1147,8 +1170,26 @@ static int add_exception_handler(const struct bpf_insn *insn,
if (BPF_CLASS(insn->code) != BPF_LDX)
dst_reg = DONT_CLEAR;
ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, fixup_offset) |
FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg);
ex->fixup = FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg);
if (is_arena) {
ex->fixup |= BPF_ARENA_ACCESS;
/*
* insn->src_reg/dst_reg holds the address in the arena region with upper 32-bits
* being zero because of a preceding addr_space_cast(r<n>, 0x0, 0x1) instruction.
* This address is adjusted with the addition of arena_vm_start (see the
* implementation of BPF_PROBE_MEM32 and BPF_PROBE_ATOMIC) before being used for the
* memory access. Pass the reg holding the unmodified 32-bit address to
* ex_handler_bpf.
*/
if (BPF_CLASS(insn->code) == BPF_LDX)
arena_reg = bpf2a64[insn->src_reg];
else
arena_reg = bpf2a64[insn->dst_reg];
ex->fixup |= FIELD_PREP(BPF_FIXUP_OFFSET_MASK, off) |
FIELD_PREP(BPF_FIXUP_ARENA_REG_MASK, arena_reg);
}
ex->type = EX_TYPE_BPF;

View File

@ -8,6 +8,7 @@
#include <linux/netdevice.h>
#include <linux/filter.h>
#include <linux/if_vlan.h>
#include <linux/bitfield.h>
#include <linux/bpf.h>
#include <linux/memory.h>
#include <linux/sort.h>
@ -1388,16 +1389,67 @@ static int emit_atomic_ld_st_index(u8 **pprog, u32 atomic_op, u32 size,
return 0;
}
/*
* Metadata encoding for exception handling in JITed code.
*
* Format of `fixup` and `data` fields in `struct exception_table_entry`:
*
* Bit layout of `fixup` (32-bit):
*
* +-----------+--------+-----------+---------+----------+
* | 31 | 30-24 | 23-16 | 15-8 | 7-0 |
* | | | | | |
* | ARENA_ACC | Unused | ARENA_REG | DST_REG | INSN_LEN |
* +-----------+--------+-----------+---------+----------+
*
* - INSN_LEN (8 bits): Length of faulting insn (max x86 insn = 15 bytes (fits in 8 bits)).
* - DST_REG (8 bits): Offset of dst_reg from reg2pt_regs[] (max offset = 112 (fits in 8 bits)).
* This is set to DONT_CLEAR if the insn is a store.
* - ARENA_REG (8 bits): Offset of the register that is used to calculate the
* address for load/store when accessing the arena region.
* - ARENA_ACCESS (1 bit): This bit is set when the faulting instruction accessed the arena region.
*
* Bit layout of `data` (32-bit):
*
* +--------------+--------+--------------+
* | 31-16 | 15-8 | 7-0 |
* | | | |
* | ARENA_OFFSET | Unused | EX_TYPE_BPF |
* +--------------+--------+--------------+
*
* - ARENA_OFFSET (16 bits): Offset used to calculate the address for load/store when
* accessing the arena region.
*/
#define DONT_CLEAR 1
#define FIXUP_INSN_LEN_MASK GENMASK(7, 0)
#define FIXUP_REG_MASK GENMASK(15, 8)
#define FIXUP_ARENA_REG_MASK GENMASK(23, 16)
#define FIXUP_ARENA_ACCESS BIT(31)
#define DATA_ARENA_OFFSET_MASK GENMASK(31, 16)
bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs)
{
u32 reg = x->fixup >> 8;
u32 reg = FIELD_GET(FIXUP_REG_MASK, x->fixup);
u32 insn_len = FIELD_GET(FIXUP_INSN_LEN_MASK, x->fixup);
bool is_arena = !!(x->fixup & FIXUP_ARENA_ACCESS);
bool is_write = (reg == DONT_CLEAR);
unsigned long addr;
s16 off;
u32 arena_reg;
if (is_arena) {
arena_reg = FIELD_GET(FIXUP_ARENA_REG_MASK, x->fixup);
off = FIELD_GET(DATA_ARENA_OFFSET_MASK, x->data);
addr = *(unsigned long *)((void *)regs + arena_reg) + off;
bpf_prog_report_arena_violation(is_write, addr, regs->ip);
}
/* jump over faulting load and clear dest register */
if (reg != DONT_CLEAR)
*(unsigned long *)((void *)regs + reg) = 0;
regs->ip += x->fixup & 0xff;
regs->ip += insn_len;
return true;
}
@ -2070,6 +2122,7 @@ st: if (is_imm8(insn->off))
{
struct exception_table_entry *ex;
u8 *_insn = image + proglen + (start_of_ldx - temp);
u32 arena_reg, fixup_reg;
s64 delta;
if (!bpf_prog->aux->extable)
@ -2089,8 +2142,29 @@ st: if (is_imm8(insn->off))
ex->data = EX_TYPE_BPF;
ex->fixup = (prog - start_of_ldx) |
((BPF_CLASS(insn->code) == BPF_LDX ? reg2pt_regs[dst_reg] : DONT_CLEAR) << 8);
/*
* src_reg/dst_reg holds the address in the arena region with upper
* 32-bits being zero because of a preceding addr_space_cast(r<n>,
* 0x0, 0x1) instruction. This address is adjusted with the addition
* of arena_vm_start (see the implementation of BPF_PROBE_MEM32 and
* BPF_PROBE_ATOMIC) before being used for the memory access. Pass
* the reg holding the unmodified 32-bit address to
* ex_handler_bpf().
*/
if (BPF_CLASS(insn->code) == BPF_LDX) {
arena_reg = reg2pt_regs[src_reg];
fixup_reg = reg2pt_regs[dst_reg];
} else {
arena_reg = reg2pt_regs[dst_reg];
fixup_reg = DONT_CLEAR;
}
ex->fixup = FIELD_PREP(FIXUP_INSN_LEN_MASK, prog - start_of_ldx) |
FIELD_PREP(FIXUP_ARENA_REG_MASK, arena_reg) |
FIELD_PREP(FIXUP_REG_MASK, fixup_reg);
ex->fixup |= FIXUP_ARENA_ACCESS;
ex->data |= FIELD_PREP(DATA_ARENA_OFFSET_MASK, insn->off);
}
break;
@ -2208,7 +2282,8 @@ st: if (is_imm8(insn->off))
* End result: x86 insn "mov rbx, qword ptr [rax+0x14]"
* of 4 bytes will be ignored and rbx will be zero inited.
*/
ex->fixup = (prog - start_of_ldx) | (reg2pt_regs[dst_reg] << 8);
ex->fixup = FIELD_PREP(FIXUP_INSN_LEN_MASK, prog - start_of_ldx) |
FIELD_PREP(FIXUP_REG_MASK, reg2pt_regs[dst_reg]);
}
break;

View File

@ -1633,6 +1633,7 @@ struct bpf_prog_aux {
/* function name for valid attach_btf_id */
const char *attach_func_name;
struct bpf_prog **func;
struct bpf_prog_aux *main_prog_aux;
void *jit_data; /* JIT specific data. arch dependent */
struct bpf_jit_poke_descriptor *poke_tab;
struct bpf_kfunc_desc_tab *kfunc_tab;
@ -2880,6 +2881,7 @@ void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
enum bpf_dynptr_type type, u32 offset, u32 size);
void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr);
void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr);
void bpf_prog_report_arena_violation(bool write, unsigned long addr, unsigned long fault_ip);
#else /* !CONFIG_BPF_SYSCALL */
static inline struct bpf_prog *bpf_prog_get(u32 ufd)
@ -3167,6 +3169,11 @@ static inline void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr)
static inline void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr)
{
}
static inline void bpf_prog_report_arena_violation(bool write, unsigned long addr,
unsigned long fault_ip)
{
}
#endif /* CONFIG_BPF_SYSCALL */
static __always_inline int

View File

@ -633,3 +633,33 @@ static int __init kfunc_init(void)
return register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, &common_kfunc_set);
}
late_initcall(kfunc_init);
void bpf_prog_report_arena_violation(bool write, unsigned long addr, unsigned long fault_ip)
{
struct bpf_stream_stage ss;
struct bpf_prog *prog;
u64 user_vm_start;
/*
* The RCU read lock is held to safely traverse the latch tree, but we
* don't need its protection when accessing the prog, since it will not
* disappear while we are handling the fault.
*/
rcu_read_lock();
prog = bpf_prog_ksym_find(fault_ip);
rcu_read_unlock();
if (!prog)
return;
/* Use main prog for stream access */
prog = prog->aux->main_prog_aux->prog;
user_vm_start = bpf_arena_get_user_vm_start(prog->aux->arena);
addr += clear_lo32(user_vm_start);
bpf_stream_stage(ss, prog, BPF_STDERR, ({
bpf_stream_printk(ss, "ERROR: Arena %s access at unmapped address 0x%lx\n",
write ? "WRITE" : "READ", addr);
bpf_stream_dump_stack(ss);
}));
}

View File

@ -120,6 +120,7 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag
fp->pages = size / PAGE_SIZE;
fp->aux = aux;
fp->aux->main_prog_aux = aux;
fp->aux->prog = fp;
fp->jit_requested = ebpf_jit_enabled();
fp->blinding_requested = bpf_jit_blinding_enabled(fp);
@ -3297,9 +3298,8 @@ static bool find_from_stack_cb(void *cookie, u64 ip, u64 sp, u64 bp)
rcu_read_unlock();
if (!prog)
return true;
if (bpf_is_subprog(prog))
return true;
ctxp->prog = prog;
/* Make sure we return the main prog if we found a subprog */
ctxp->prog = prog->aux->main_prog_aux->prog;
return false;
}

View File

@ -21601,6 +21601,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
func[i]->aux->func_info_cnt = prog->aux->func_info_cnt;
func[i]->aux->poke_tab = prog->aux->poke_tab;
func[i]->aux->size_poke_tab = prog->aux->size_poke_tab;
func[i]->aux->main_prog_aux = prog->aux;
for (j = 0; j < prog->aux->size_poke_tab; j++) {
struct bpf_jit_poke_descriptor *poke;

View File

@ -2,7 +2,6 @@
/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
#include <test_progs.h>
#include <sys/mman.h>
#include <regex.h>
#include "stream.skel.h"
#include "stream_fail.skel.h"
@ -18,87 +17,6 @@ void test_stream_success(void)
return;
}
struct {
int prog_off;
const char *errstr;
} stream_error_arr[] = {
{
offsetof(struct stream, progs.stream_cond_break),
"ERROR: Timeout detected for may_goto instruction\n"
"CPU: [0-9]+ UID: 0 PID: [0-9]+ Comm: .*\n"
"Call trace:\n"
"([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n"
"|[ \t]+[^\n]+\n)*",
},
{
offsetof(struct stream, progs.stream_deadlock),
"ERROR: AA or ABBA deadlock detected for bpf_res_spin_lock\n"
"Attempted lock = (0x[0-9a-fA-F]+)\n"
"Total held locks = 1\n"
"Held lock\\[ 0\\] = \\1\n" // Lock address must match
"CPU: [0-9]+ UID: 0 PID: [0-9]+ Comm: .*\n"
"Call trace:\n"
"([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n"
"|[ \t]+[^\n]+\n)*",
},
};
static int match_regex(const char *pattern, const char *string)
{
int err, rc;
regex_t re;
err = regcomp(&re, pattern, REG_EXTENDED | REG_NEWLINE);
if (err)
return -1;
rc = regexec(&re, string, 0, NULL, 0);
regfree(&re);
return rc == 0 ? 1 : 0;
}
void test_stream_errors(void)
{
LIBBPF_OPTS(bpf_test_run_opts, opts);
LIBBPF_OPTS(bpf_prog_stream_read_opts, ropts);
struct stream *skel;
int ret, prog_fd;
char buf[1024];
skel = stream__open_and_load();
if (!ASSERT_OK_PTR(skel, "stream__open_and_load"))
return;
for (int i = 0; i < ARRAY_SIZE(stream_error_arr); i++) {
struct bpf_program **prog;
prog = (struct bpf_program **)(((char *)skel) + stream_error_arr[i].prog_off);
prog_fd = bpf_program__fd(*prog);
ret = bpf_prog_test_run_opts(prog_fd, &opts);
ASSERT_OK(ret, "ret");
ASSERT_OK(opts.retval, "retval");
#if !defined(__x86_64__) && !defined(__s390x__) && !defined(__aarch64__)
ASSERT_TRUE(1, "Timed may_goto unsupported, skip.");
if (i == 0) {
ret = bpf_prog_stream_read(prog_fd, 2, buf, sizeof(buf), &ropts);
ASSERT_EQ(ret, 0, "stream read");
continue;
}
#endif
ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDERR, buf, sizeof(buf), &ropts);
ASSERT_GT(ret, 0, "stream read");
ASSERT_LE(ret, 1023, "len for buf");
buf[ret] = '\0';
ret = match_regex(stream_error_arr[i].errstr, buf);
if (!ASSERT_TRUE(ret == 1, "regex match"))
fprintf(stderr, "Output from stream:\n%s\n", buf);
}
stream__destroy(skel);
}
void test_stream_syscall(void)
{
LIBBPF_OPTS(bpf_test_run_opts, opts);
@ -139,3 +57,52 @@ void test_stream_syscall(void)
stream__destroy(skel);
}
static void test_address(struct bpf_program *prog, unsigned long *fault_addr_p)
{
LIBBPF_OPTS(bpf_test_run_opts, opts);
LIBBPF_OPTS(bpf_prog_stream_read_opts, ropts);
int ret, prog_fd;
char fault_addr[64];
char buf[1024];
prog_fd = bpf_program__fd(prog);
ret = bpf_prog_test_run_opts(prog_fd, &opts);
ASSERT_OK(ret, "ret");
ASSERT_OK(opts.retval, "retval");
sprintf(fault_addr, "0x%lx", *fault_addr_p);
ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDERR, buf, sizeof(buf), &ropts);
ASSERT_GT(ret, 0, "stream read");
ASSERT_LE(ret, 1023, "len for buf");
buf[ret] = '\0';
if (!ASSERT_HAS_SUBSTR(buf, fault_addr, "fault_addr")) {
fprintf(stderr, "Output from stream:\n%s\n", buf);
fprintf(stderr, "Fault Addr: %s\n", fault_addr);
}
}
void test_stream_arena_fault_address(void)
{
struct stream *skel;
#if !defined(__x86_64__) && !defined(__aarch64__)
printf("%s:SKIP: arena fault reporting not supported\n", __func__);
test__skip();
return;
#endif
skel = stream__open_and_load();
if (!ASSERT_OK_PTR(skel, "stream__open_and_load"))
return;
if (test__start_subtest("read_fault"))
test_address(skel->progs.stream_arena_read_fault, &skel->bss->fault_addr);
if (test__start_subtest("write_fault"))
test_address(skel->progs.stream_arena_write_fault, &skel->bss->fault_addr);
stream__destroy(skel);
}

View File

@ -35,6 +35,12 @@
* inside the brackets.
* __msg_unpriv Same as __msg but for unprivileged mode.
*
* __stderr Message expected to be found in bpf stderr stream. The
* same regex rules apply like __msg.
* __stderr_unpriv Same as __stderr but for unpriveleged mode.
* __stdout Same as __stderr but for stdout stream.
* __stdout_unpriv Same as __stdout but for unpriveleged mode.
*
* __xlated Expect a line in a disassembly log after verifier applies rewrites.
* Multiple __xlated attributes could be specified.
* Regular expressions could be specified same way as in __msg.
@ -140,6 +146,10 @@
#define __caps_unpriv(caps) __attribute__((btf_decl_tag("comment:test_caps_unpriv=" EXPAND_QUOTE(caps))))
#define __load_if_JITed() __attribute__((btf_decl_tag("comment:load_mode=jited")))
#define __load_if_no_JITed() __attribute__((btf_decl_tag("comment:load_mode=no_jited")))
#define __stderr(msg) __attribute__((btf_decl_tag("comment:test_expect_stderr=" XSTR(__COUNTER__) "=" msg)))
#define __stderr_unpriv(msg) __attribute__((btf_decl_tag("comment:test_expect_stderr_unpriv=" XSTR(__COUNTER__) "=" msg)))
#define __stdout(msg) __attribute__((btf_decl_tag("comment:test_expect_stdout=" XSTR(__COUNTER__) "=" msg)))
#define __stdout_unpriv(msg) __attribute__((btf_decl_tag("comment:test_expect_stdout_unpriv=" XSTR(__COUNTER__) "=" msg)))
/* Define common capabilities tested using __caps_unpriv */
#define CAP_NET_ADMIN 12

View File

@ -5,6 +5,7 @@
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
#include "bpf_experimental.h"
#include "bpf_arena_common.h"
struct arr_elem {
struct bpf_res_spin_lock lock;
@ -17,10 +18,29 @@ struct {
__type(value, struct arr_elem);
} arrmap SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_ARENA);
__uint(map_flags, BPF_F_MMAPABLE);
__uint(max_entries, 1); /* number of pages */
} arena SEC(".maps");
struct elem {
struct bpf_timer timer;
};
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__uint(max_entries, 1);
__type(key, int);
__type(value, struct elem);
} array SEC(".maps");
#define ENOSPC 28
#define _STR "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
int size;
u64 fault_addr;
void *arena_ptr;
SEC("syscall")
__success __retval(0)
@ -37,7 +57,15 @@ int stream_exhaust(void *ctx)
}
SEC("syscall")
__arch_x86_64
__arch_arm64
__arch_s390x
__success __retval(0)
__stderr("ERROR: Timeout detected for may_goto instruction")
__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}")
__stderr("Call trace:\n"
"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n"
"|[ \t]+[^\n]+\n)*}}")
int stream_cond_break(void *ctx)
{
while (can_loop)
@ -47,6 +75,15 @@ int stream_cond_break(void *ctx)
SEC("syscall")
__success __retval(0)
__stderr("ERROR: AA or ABBA deadlock detected for bpf_res_spin_lock")
__stderr("{{Attempted lock = (0x[0-9a-fA-F]+)\n"
"Total held locks = 1\n"
"Held lock\\[ 0\\] = \\1}}")
__stderr("...")
__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}")
__stderr("Call trace:\n"
"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n"
"|[ \t]+[^\n]+\n)*}}")
int stream_deadlock(void *ctx)
{
struct bpf_res_spin_lock *lock, *nlock;
@ -76,4 +113,125 @@ int stream_syscall(void *ctx)
return 0;
}
SEC("syscall")
__arch_x86_64
__arch_arm64
__success __retval(0)
__stderr("ERROR: Arena WRITE access at unmapped address 0x{{.*}}")
__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}")
__stderr("Call trace:\n"
"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n"
"|[ \t]+[^\n]+\n)*}}")
int stream_arena_write_fault(void *ctx)
{
struct bpf_arena *ptr = (void *)&arena;
u64 user_vm_start;
/* Prevent GCC bounds warning: casting &arena to struct bpf_arena *
* triggers bounds checking since the map definition is smaller than struct
* bpf_arena. barrier_var() makes the pointer opaque to GCC, preventing the
* bounds analysis
*/
barrier_var(ptr);
user_vm_start = ptr->user_vm_start;
fault_addr = user_vm_start + 0x7fff;
bpf_addr_space_cast(user_vm_start, 0, 1);
asm volatile (
"r1 = %0;"
"r2 = 1;"
"*(u32 *)(r1 + 0x7fff) = r2;"
:
: "r" (user_vm_start)
: "r1", "r2"
);
return 0;
}
SEC("syscall")
__arch_x86_64
__arch_arm64
__success __retval(0)
__stderr("ERROR: Arena READ access at unmapped address 0x{{.*}}")
__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}")
__stderr("Call trace:\n"
"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n"
"|[ \t]+[^\n]+\n)*}}")
int stream_arena_read_fault(void *ctx)
{
struct bpf_arena *ptr = (void *)&arena;
u64 user_vm_start;
/* Prevent GCC bounds warning: casting &arena to struct bpf_arena *
* triggers bounds checking since the map definition is smaller than struct
* bpf_arena. barrier_var() makes the pointer opaque to GCC, preventing the
* bounds analysis
*/
barrier_var(ptr);
user_vm_start = ptr->user_vm_start;
fault_addr = user_vm_start + 0x7fff;
bpf_addr_space_cast(user_vm_start, 0, 1);
asm volatile (
"r1 = %0;"
"r1 = *(u32 *)(r1 + 0x7fff);"
:
: "r" (user_vm_start)
: "r1"
);
return 0;
}
static __noinline void subprog(void)
{
int __arena *addr = (int __arena *)0xdeadbeef;
arena_ptr = &arena;
*addr = 1;
}
SEC("syscall")
__arch_x86_64
__arch_arm64
__success __retval(0)
__stderr("ERROR: Arena WRITE access at unmapped address 0x{{.*}}")
__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}")
__stderr("Call trace:\n"
"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n"
"|[ \t]+[^\n]+\n)*}}")
int stream_arena_subprog_fault(void *ctx)
{
subprog();
return 0;
}
static __noinline int timer_cb(void *map, int *key, struct bpf_timer *timer)
{
int __arena *addr = (int __arena *)0xdeadbeef;
arena_ptr = &arena;
*addr = 1;
return 0;
}
SEC("syscall")
__arch_x86_64
__arch_arm64
__success __retval(0)
__stderr("ERROR: Arena WRITE access at unmapped address 0x{{.*}}")
__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}")
__stderr("Call trace:\n"
"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n"
"|[ \t]+[^\n]+\n)*}}")
int stream_arena_callback_fault(void *ctx)
{
struct bpf_timer *arr_timer;
arr_timer = bpf_map_lookup_elem(&array, &(int){0});
if (!arr_timer)
return 0;
bpf_timer_init(arr_timer, &array, 1);
bpf_timer_set_callback(arr_timer, timer_cb);
bpf_timer_start(arr_timer, 0, 0);
return 0;
}
char _license[] SEC("license") = "GPL";

View File

@ -38,6 +38,10 @@
#define TEST_TAG_JITED_PFX_UNPRIV "comment:test_jited_unpriv="
#define TEST_TAG_CAPS_UNPRIV "comment:test_caps_unpriv="
#define TEST_TAG_LOAD_MODE_PFX "comment:load_mode="
#define TEST_TAG_EXPECT_STDERR_PFX "comment:test_expect_stderr="
#define TEST_TAG_EXPECT_STDERR_PFX_UNPRIV "comment:test_expect_stderr_unpriv="
#define TEST_TAG_EXPECT_STDOUT_PFX "comment:test_expect_stdout="
#define TEST_TAG_EXPECT_STDOUT_PFX_UNPRIV "comment:test_expect_stdout_unpriv="
/* Warning: duplicated in bpf_misc.h */
#define POINTER_VALUE 0xbadcafe
@ -79,6 +83,8 @@ struct test_subspec {
struct expected_msgs expect_msgs;
struct expected_msgs expect_xlated;
struct expected_msgs jited;
struct expected_msgs stderr;
struct expected_msgs stdout;
int retval;
bool execute;
__u64 caps;
@ -139,6 +145,10 @@ static void free_test_spec(struct test_spec *spec)
free_msgs(&spec->unpriv.expect_xlated);
free_msgs(&spec->priv.jited);
free_msgs(&spec->unpriv.jited);
free_msgs(&spec->unpriv.stderr);
free_msgs(&spec->priv.stderr);
free_msgs(&spec->unpriv.stdout);
free_msgs(&spec->priv.stdout);
free(spec->priv.name);
free(spec->unpriv.name);
@ -407,6 +417,10 @@ static int parse_test_spec(struct test_loader *tester,
bool xlated_on_next_line = true;
bool unpriv_jit_on_next_line;
bool jit_on_next_line;
bool stderr_on_next_line = true;
bool unpriv_stderr_on_next_line = true;
bool stdout_on_next_line = true;
bool unpriv_stdout_on_next_line = true;
bool collect_jit = false;
int func_id, i, err = 0;
u32 arch_mask = 0;
@ -598,6 +612,26 @@ static int parse_test_spec(struct test_loader *tester,
err = -EINVAL;
goto cleanup;
}
} else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_STDERR_PFX))) {
err = push_disasm_msg(msg, &stderr_on_next_line,
&spec->priv.stderr);
if (err)
goto cleanup;
} else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_STDERR_PFX_UNPRIV))) {
err = push_disasm_msg(msg, &unpriv_stderr_on_next_line,
&spec->unpriv.stderr);
if (err)
goto cleanup;
} else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_STDOUT_PFX))) {
err = push_disasm_msg(msg, &stdout_on_next_line,
&spec->priv.stdout);
if (err)
goto cleanup;
} else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_STDOUT_PFX_UNPRIV))) {
err = push_disasm_msg(msg, &unpriv_stdout_on_next_line,
&spec->unpriv.stdout);
if (err)
goto cleanup;
}
}
@ -651,6 +685,10 @@ static int parse_test_spec(struct test_loader *tester,
clone_msgs(&spec->priv.expect_xlated, &spec->unpriv.expect_xlated);
if (spec->unpriv.jited.cnt == 0)
clone_msgs(&spec->priv.jited, &spec->unpriv.jited);
if (spec->unpriv.stderr.cnt == 0)
clone_msgs(&spec->priv.stderr, &spec->unpriv.stderr);
if (spec->unpriv.stdout.cnt == 0)
clone_msgs(&spec->priv.stdout, &spec->unpriv.stdout);
}
spec->valid = true;
@ -712,6 +750,20 @@ static void emit_jited(const char *jited, bool force)
fprintf(stdout, "JITED:\n=============\n%s=============\n", jited);
}
static void emit_stderr(const char *stderr, bool force)
{
if (!force && env.verbosity == VERBOSE_NONE)
return;
fprintf(stdout, "STDERR:\n=============\n%s=============\n", stderr);
}
static void emit_stdout(const char *bpf_stdout, bool force)
{
if (!force && env.verbosity == VERBOSE_NONE)
return;
fprintf(stdout, "STDOUT:\n=============\n%s=============\n", bpf_stdout);
}
static void validate_msgs(char *log_buf, struct expected_msgs *msgs,
void (*emit_fn)(const char *buf, bool force))
{
@ -934,6 +986,19 @@ static int get_xlated_program_text(int prog_fd, char *text, size_t text_sz)
return err;
}
/* Read the bpf stream corresponding to the stream_id */
static int get_stream(int stream_id, int prog_fd, char *text, size_t text_sz)
{
LIBBPF_OPTS(bpf_prog_stream_read_opts, ropts);
int ret;
ret = bpf_prog_stream_read(prog_fd, stream_id, text, text_sz, &ropts);
ASSERT_GT(ret, 0, "stream read");
text[ret] = '\0';
return ret;
}
/* this function is forced noinline and has short generic name to look better
* in test_progs output (in case of a failure)
*/
@ -1108,6 +1173,31 @@ void run_subtest(struct test_loader *tester,
PRINT_FAIL("Unexpected retval: %d != %d\n", retval, subspec->retval);
goto tobj_cleanup;
}
if (subspec->stderr.cnt) {
err = get_stream(2, bpf_program__fd(tprog),
tester->log_buf, tester->log_buf_sz);
if (err <= 0) {
PRINT_FAIL("Unexpected retval from get_stream(): %d, errno = %d\n",
err, errno);
goto tobj_cleanup;
}
emit_stderr(tester->log_buf, false /*force*/);
validate_msgs(tester->log_buf, &subspec->stderr, emit_stderr);
}
if (subspec->stdout.cnt) {
err = get_stream(1, bpf_program__fd(tprog),
tester->log_buf, tester->log_buf_sz);
if (err <= 0) {
PRINT_FAIL("Unexpected retval from get_stream(): %d, errno = %d\n",
err, errno);
goto tobj_cleanup;
}
emit_stdout(tester->log_buf, false /*force*/);
validate_msgs(tester->log_buf, &subspec->stdout, emit_stdout);
}
/* redo bpf_map__attach_struct_ops for each test */
while (links_cnt > 0)
bpf_link__destroy(links[--links_cnt]);