mirror of
https://github.com/torvalds/linux.git
synced 2026-05-30 10:04:04 +02:00
Merge branch 'libbpf-fix-usdt-sib-argument-handling-causing-unrecognized-register-error'
Jiawei Zhao says:
====================
libbpf: fix USDT SIB argument handling causing unrecognized register error
When using GCC on x86-64 to compile an usdt prog with -O1 or higher
optimization, the compiler will generate SIB addressing mode for global
array, e.g. "1@-96(%rbp,%rax,8)".
The current USDT implementation in libbpf cannot parse these two formats,
causing `bpf_program__attach_usdt()` to fail with -ENOENT
(unrecognized register).
This patch series adds support for SIB addressing mode in USDT probes.
The main changes include:
- add correct handling logic for SIB-addressed arguments in
`parse_usdt_arg`.
- add an usdt_o2 test case to cover SIB addressing mode.
Testing shows that the SIB probe correctly generates 8@(%rcx,%rax,8)
argument spec and passes all validation checks.
The modification history of this patch series:
Change since v1:
- refactor the code to make it more readable
- modify the commit message to explain why and how
Change since v2:
- fix the `scale` uninitialized error
Change since v3:
- force -O2 optimization for usdt.test.o to generate SIB addressing usdt
and pass all test cases.
Change since v4:
- split the patch into two parts, one for the fix and the other for the
test
Change since v5:
- Only enable optimization for x86 architecture to generate SIB addressing
usdt argument spec.
Change since v6:
- Add an usdt_o2 test case to cover SIB addressing mode.
- Reinstate the usdt.c test case.
Change since v7:
- Refactor modifications to __bpf_usdt_arg_spec to avoid increasing its size,
achieving better compatibility
- Fix some minor code style issues
- Refactor the usdt_o2 test case, removing semaphore and adding GCC attribute
to force -O2 optimization
Change since v8:
- Refactor the usdt_o2 test case, using assembly to force SIB addressing mode.
Change since v9:
- Only enable the usdt_o2 test case on x86_64 and i386 architectures since the
SIB addressing mode is only supported on x86_64 and i386.
Change since v10:
- Replace `__attribute__((optimize("O2")))` with `#pragma GCC optimize("O1")`
to fix the issue where the optimized compilation condition works improperly.
- Renamed test case usdt_o2 and relevant files name to usdt_o1 in that O1
level optimization is enough to generate SIB addressing usdt argument spec.
Change since v11:
- Replace `STAP_PROBE1` with `STAP_PROBE_ASM`
- Use bit fields instead of bit shifting operations
- Merge the usdt_o1 test case into the usdt test case
Change since v12:
- This patch is same with the v12 but with a new version number.
Change since v13(resolve some review comments):
- https://lore.kernel.org/bpf/CAEf4BzZWd2zUC=U6uGJFF3EMZ7zWGLweQAG3CJWTeHy-5yFEPw@mail.gmail.com/
- https://lore.kernel.org/bpf/CAEf4Bzbs3hV_Q47+d93tTX13WkrpkpOb4=U04mZCjHyZg4aVdw@mail.gmail.com/
Change since v14:
- fix a typo in __bpf_usdt_arg_spec
Change since v15(resolve some review comments):
- https://lore.kernel.org/bpf/CAEf4BzaxuYijEfQMDFZ+CQdjxLuDZiesUXNA-SiopS+5+VxRaA@mail.gmail.com/
- https://lore.kernel.org/bpf/CAEf4BzaHi5kpuJ6OVvDU62LT5g0qHbWYMfb_FBQ3iuvvUF9fag@mail.gmail.com/
- https://lore.kernel.org/bpf/d438bf3a-a9c9-4d34-b814-63f2e9bb3a85@linux.dev/
====================
Link: https://patch.msgid.link/20250827053128.1301287-1-phoenix500526@163.com
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
This commit is contained in:
commit
4c229f337e
|
|
@ -34,13 +34,32 @@ enum __bpf_usdt_arg_type {
|
|||
BPF_USDT_ARG_CONST,
|
||||
BPF_USDT_ARG_REG,
|
||||
BPF_USDT_ARG_REG_DEREF,
|
||||
BPF_USDT_ARG_SIB,
|
||||
};
|
||||
|
||||
/*
|
||||
* This struct layout is designed specifically to be backwards/forward
|
||||
* compatible between libbpf versions for ARG_CONST, ARG_REG, and
|
||||
* ARG_REG_DEREF modes. ARG_SIB requires libbpf v1.7+.
|
||||
*/
|
||||
struct __bpf_usdt_arg_spec {
|
||||
/* u64 scalar interpreted depending on arg_type, see below */
|
||||
__u64 val_off;
|
||||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
/* arg location case, see bpf_usdt_arg() for details */
|
||||
enum __bpf_usdt_arg_type arg_type;
|
||||
enum __bpf_usdt_arg_type arg_type: 8;
|
||||
/* index register offset within struct pt_regs */
|
||||
__u16 idx_reg_off: 12;
|
||||
/* scale factor for index register (1, 2, 4, or 8) */
|
||||
__u16 scale_bitshift: 4;
|
||||
/* reserved for future use, keeps reg_off offset stable */
|
||||
__u8 __reserved: 8;
|
||||
#else
|
||||
__u8 __reserved: 8;
|
||||
__u16 idx_reg_off: 12;
|
||||
__u16 scale_bitshift: 4;
|
||||
enum __bpf_usdt_arg_type arg_type: 8;
|
||||
#endif
|
||||
/* offset of referenced register within struct pt_regs */
|
||||
short reg_off;
|
||||
/* whether arg should be interpreted as signed value */
|
||||
|
|
@ -149,7 +168,7 @@ int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res)
|
|||
{
|
||||
struct __bpf_usdt_spec *spec;
|
||||
struct __bpf_usdt_arg_spec *arg_spec;
|
||||
unsigned long val;
|
||||
unsigned long val, idx;
|
||||
int err, spec_id;
|
||||
|
||||
*res = 0;
|
||||
|
|
@ -202,6 +221,27 @@ int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res)
|
|||
return err;
|
||||
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||
val >>= arg_spec->arg_bitshift;
|
||||
#endif
|
||||
break;
|
||||
case BPF_USDT_ARG_SIB:
|
||||
/* Arg is in memory addressed by SIB (Scale-Index-Base) mode
|
||||
* (e.g., "-1@-96(%rbp,%rax,8)" in USDT arg spec). We first
|
||||
* fetch the base register contents and the index register
|
||||
* contents from pt_regs. Then we calculate the final address
|
||||
* as base + (index * scale) + offset, and do a user-space
|
||||
* probe read to fetch the argument value.
|
||||
*/
|
||||
err = bpf_probe_read_kernel(&val, sizeof(val), (void *)ctx + arg_spec->reg_off);
|
||||
if (err)
|
||||
return err;
|
||||
err = bpf_probe_read_kernel(&idx, sizeof(idx), (void *)ctx + arg_spec->idx_reg_off);
|
||||
if (err)
|
||||
return err;
|
||||
err = bpf_probe_read_user(&val, sizeof(val), (void *)(val + (idx << arg_spec->scale_bitshift) + arg_spec->val_off));
|
||||
if (err)
|
||||
return err;
|
||||
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||
val >>= arg_spec->arg_bitshift;
|
||||
#endif
|
||||
break;
|
||||
default:
|
||||
|
|
|
|||
|
|
@ -200,12 +200,23 @@ enum usdt_arg_type {
|
|||
USDT_ARG_CONST,
|
||||
USDT_ARG_REG,
|
||||
USDT_ARG_REG_DEREF,
|
||||
USDT_ARG_SIB,
|
||||
};
|
||||
|
||||
/* should match exactly struct __bpf_usdt_arg_spec from usdt.bpf.h */
|
||||
struct usdt_arg_spec {
|
||||
__u64 val_off;
|
||||
enum usdt_arg_type arg_type;
|
||||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
enum usdt_arg_type arg_type: 8;
|
||||
__u16 idx_reg_off: 12;
|
||||
__u16 scale_bitshift: 4;
|
||||
__u8 __reserved: 8; /* keep reg_off offset stable */
|
||||
#else
|
||||
__u8 __reserved: 8; /* keep reg_off offset stable */
|
||||
__u16 idx_reg_off: 12;
|
||||
__u16 scale_bitshift: 4;
|
||||
enum usdt_arg_type arg_type: 8;
|
||||
#endif
|
||||
short reg_off;
|
||||
bool arg_signed;
|
||||
char arg_bitshift;
|
||||
|
|
@ -1283,11 +1294,51 @@ static int calc_pt_regs_off(const char *reg_name)
|
|||
|
||||
static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg, int *arg_sz)
|
||||
{
|
||||
char reg_name[16];
|
||||
int len, reg_off;
|
||||
long off;
|
||||
char reg_name[16] = {0}, idx_reg_name[16] = {0};
|
||||
int len, reg_off, idx_reg_off, scale = 1;
|
||||
long off = 0;
|
||||
|
||||
if (sscanf(arg_str, " %d @ %ld ( %%%15[^)] ) %n", arg_sz, &off, reg_name, &len) == 3) {
|
||||
if (sscanf(arg_str, " %d @ %ld ( %%%15[^,] , %%%15[^,] , %d ) %n",
|
||||
arg_sz, &off, reg_name, idx_reg_name, &scale, &len) == 5 ||
|
||||
sscanf(arg_str, " %d @ ( %%%15[^,] , %%%15[^,] , %d ) %n",
|
||||
arg_sz, reg_name, idx_reg_name, &scale, &len) == 4 ||
|
||||
sscanf(arg_str, " %d @ %ld ( %%%15[^,] , %%%15[^)] ) %n",
|
||||
arg_sz, &off, reg_name, idx_reg_name, &len) == 4 ||
|
||||
sscanf(arg_str, " %d @ ( %%%15[^,] , %%%15[^)] ) %n",
|
||||
arg_sz, reg_name, idx_reg_name, &len) == 3
|
||||
) {
|
||||
/*
|
||||
* Scale Index Base case:
|
||||
* 1@-96(%rbp,%rax,8)
|
||||
* 1@(%rbp,%rax,8)
|
||||
* 1@-96(%rbp,%rax)
|
||||
* 1@(%rbp,%rax)
|
||||
*/
|
||||
arg->arg_type = USDT_ARG_SIB;
|
||||
arg->val_off = off;
|
||||
|
||||
reg_off = calc_pt_regs_off(reg_name);
|
||||
if (reg_off < 0)
|
||||
return reg_off;
|
||||
arg->reg_off = reg_off;
|
||||
|
||||
idx_reg_off = calc_pt_regs_off(idx_reg_name);
|
||||
if (idx_reg_off < 0)
|
||||
return idx_reg_off;
|
||||
arg->idx_reg_off = idx_reg_off;
|
||||
|
||||
/* validate scale factor and set fields directly */
|
||||
switch (scale) {
|
||||
case 1: arg->scale_bitshift = 0; break;
|
||||
case 2: arg->scale_bitshift = 1; break;
|
||||
case 4: arg->scale_bitshift = 2; break;
|
||||
case 8: arg->scale_bitshift = 3; break;
|
||||
default:
|
||||
pr_warn("usdt: invalid SIB scale %d, expected 1, 2, 4, 8\n", scale);
|
||||
return -EINVAL;
|
||||
}
|
||||
} else if (sscanf(arg_str, " %d @ %ld ( %%%15[^)] ) %n",
|
||||
arg_sz, &off, reg_name, &len) == 3) {
|
||||
/* Memory dereference case, e.g., -4@-20(%rbp) */
|
||||
arg->arg_type = USDT_ARG_REG_DEREF;
|
||||
arg->val_off = off;
|
||||
|
|
@ -1306,6 +1357,7 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec
|
|||
} else if (sscanf(arg_str, " %d @ %%%15s %n", arg_sz, reg_name, &len) == 2) {
|
||||
/* Register read case, e.g., -4@%eax */
|
||||
arg->arg_type = USDT_ARG_REG;
|
||||
/* register read has no memory offset */
|
||||
arg->val_off = 0;
|
||||
|
||||
reg_off = calc_pt_regs_off(reg_name);
|
||||
|
|
|
|||
|
|
@ -40,12 +40,72 @@ static void __always_inline trigger_func(int x) {
|
|||
}
|
||||
}
|
||||
|
||||
#if defined(__x86_64__) || defined(__i386__)
|
||||
/*
|
||||
* SIB (Scale-Index-Base) addressing format: "size@(base_reg, index_reg, scale)"
|
||||
* - 'size' is the size in bytes of the array element, and its sign indicates
|
||||
* whether the type is signed (negative) or unsigned (positive).
|
||||
* - 'base_reg' is the register holding the base address, normally rdx or edx
|
||||
* - 'index_reg' is the register holding the index, normally rax or eax
|
||||
* - 'scale' is the scaling factor (typically 1, 2, 4, or 8), which matches the
|
||||
* size of the element type.
|
||||
*
|
||||
* For example, for an array of 'short' (signed 2-byte elements), the SIB spec would be:
|
||||
* - size: -2 (negative because 'short' is signed)
|
||||
* - scale: 2 (since sizeof(short) == 2)
|
||||
*
|
||||
* The resulting SIB format: "-2@(%%rdx,%%rax,2)" for x86_64, "-2@(%%edx,%%eax,2)" for i386
|
||||
*/
|
||||
static volatile short array[] = {-1, -2, -3, -4};
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#define USDT_SIB_ARG_SPEC -2@(%%rdx,%%rax,2)
|
||||
#else
|
||||
#define USDT_SIB_ARG_SPEC -2@(%%edx,%%eax,2)
|
||||
#endif
|
||||
|
||||
unsigned short test_usdt_sib_semaphore SEC(".probes");
|
||||
|
||||
static void trigger_sib_spec(void)
|
||||
{
|
||||
/*
|
||||
* Force SIB addressing with inline assembly.
|
||||
*
|
||||
* You must compile with -std=gnu99 or -std=c99 to use the
|
||||
* STAP_PROBE_ASM macro.
|
||||
*
|
||||
* The STAP_PROBE_ASM macro generates a quoted string that gets
|
||||
* inserted between the surrounding assembly instructions. In this
|
||||
* case, USDT_SIB_ARG_SPEC is embedded directly into the instruction
|
||||
* stream, creating a probe point between the asm statement boundaries.
|
||||
* It works fine with gcc/clang.
|
||||
*
|
||||
* Register constraints:
|
||||
* - "d"(array): Binds the 'array' variable to %rdx or %edx register
|
||||
* - "a"(0): Binds the constant 0 to %rax or %eax register
|
||||
* These ensure that when USDT_SIB_ARG_SPEC references %%rdx(%edx) and
|
||||
* %%rax(%eax), they contain the expected values for SIB addressing.
|
||||
*
|
||||
* The "memory" clobber prevents the compiler from reordering memory
|
||||
* accesses around the probe point, ensuring that the probe behavior
|
||||
* is predictable and consistent.
|
||||
*/
|
||||
asm volatile(
|
||||
STAP_PROBE_ASM(test, usdt_sib, USDT_SIB_ARG_SPEC)
|
||||
:
|
||||
: "d"(array), "a"(0)
|
||||
: "memory"
|
||||
);
|
||||
}
|
||||
#endif
|
||||
|
||||
static void subtest_basic_usdt(void)
|
||||
{
|
||||
LIBBPF_OPTS(bpf_usdt_opts, opts);
|
||||
struct test_usdt *skel;
|
||||
struct test_usdt__bss *bss;
|
||||
int err, i;
|
||||
const __u64 expected_cookie = 0xcafedeadbeeffeed;
|
||||
|
||||
skel = test_usdt__open_and_load();
|
||||
if (!ASSERT_OK_PTR(skel, "skel_open"))
|
||||
|
|
@ -59,20 +119,29 @@ static void subtest_basic_usdt(void)
|
|||
goto cleanup;
|
||||
|
||||
/* usdt0 won't be auto-attached */
|
||||
opts.usdt_cookie = 0xcafedeadbeeffeed;
|
||||
opts.usdt_cookie = expected_cookie;
|
||||
skel->links.usdt0 = bpf_program__attach_usdt(skel->progs.usdt0,
|
||||
0 /*self*/, "/proc/self/exe",
|
||||
"test", "usdt0", &opts);
|
||||
if (!ASSERT_OK_PTR(skel->links.usdt0, "usdt0_link"))
|
||||
goto cleanup;
|
||||
|
||||
#if defined(__x86_64__) || defined(__i386__)
|
||||
opts.usdt_cookie = expected_cookie;
|
||||
skel->links.usdt_sib = bpf_program__attach_usdt(skel->progs.usdt_sib,
|
||||
0 /*self*/, "/proc/self/exe",
|
||||
"test", "usdt_sib", &opts);
|
||||
if (!ASSERT_OK_PTR(skel->links.usdt_sib, "usdt_sib_link"))
|
||||
goto cleanup;
|
||||
#endif
|
||||
|
||||
trigger_func(1);
|
||||
|
||||
ASSERT_EQ(bss->usdt0_called, 1, "usdt0_called");
|
||||
ASSERT_EQ(bss->usdt3_called, 1, "usdt3_called");
|
||||
ASSERT_EQ(bss->usdt12_called, 1, "usdt12_called");
|
||||
|
||||
ASSERT_EQ(bss->usdt0_cookie, 0xcafedeadbeeffeed, "usdt0_cookie");
|
||||
ASSERT_EQ(bss->usdt0_cookie, expected_cookie, "usdt0_cookie");
|
||||
ASSERT_EQ(bss->usdt0_arg_cnt, 0, "usdt0_arg_cnt");
|
||||
ASSERT_EQ(bss->usdt0_arg_ret, -ENOENT, "usdt0_arg_ret");
|
||||
ASSERT_EQ(bss->usdt0_arg_size, -ENOENT, "usdt0_arg_size");
|
||||
|
|
@ -156,6 +225,16 @@ static void subtest_basic_usdt(void)
|
|||
ASSERT_EQ(bss->usdt3_args[1], 42, "usdt3_arg2");
|
||||
ASSERT_EQ(bss->usdt3_args[2], (uintptr_t)&bla, "usdt3_arg3");
|
||||
|
||||
#if defined(__x86_64__) || defined(__i386__)
|
||||
trigger_sib_spec();
|
||||
ASSERT_EQ(bss->usdt_sib_called, 1, "usdt_sib_called");
|
||||
ASSERT_EQ(bss->usdt_sib_cookie, expected_cookie, "usdt_sib_cookie");
|
||||
ASSERT_EQ(bss->usdt_sib_arg_cnt, 1, "usdt_sib_arg_cnt");
|
||||
ASSERT_EQ(bss->usdt_sib_arg, nums[0], "usdt_sib_arg");
|
||||
ASSERT_EQ(bss->usdt_sib_arg_ret, 0, "usdt_sib_arg_ret");
|
||||
ASSERT_EQ(bss->usdt_sib_arg_size, sizeof(nums[0]), "usdt_sib_arg_size");
|
||||
#endif
|
||||
|
||||
cleanup:
|
||||
test_usdt__destroy(skel);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -107,4 +107,35 @@ int BPF_USDT(usdt12, int a1, int a2, long a3, long a4, unsigned a5,
|
|||
return 0;
|
||||
}
|
||||
|
||||
int usdt_sib_called;
|
||||
u64 usdt_sib_cookie;
|
||||
int usdt_sib_arg_cnt;
|
||||
int usdt_sib_arg_ret;
|
||||
short usdt_sib_arg;
|
||||
int usdt_sib_arg_size;
|
||||
|
||||
/*
|
||||
* usdt_sib is only tested on x86-related architectures, so it requires
|
||||
* manual attach since auto-attach will panic tests under other architectures
|
||||
*/
|
||||
SEC("usdt")
|
||||
int usdt_sib(struct pt_regs *ctx)
|
||||
{
|
||||
long tmp;
|
||||
|
||||
if (my_pid != (bpf_get_current_pid_tgid() >> 32))
|
||||
return 0;
|
||||
|
||||
__sync_fetch_and_add(&usdt_sib_called, 1);
|
||||
|
||||
usdt_sib_cookie = bpf_usdt_cookie(ctx);
|
||||
usdt_sib_arg_cnt = bpf_usdt_arg_cnt(ctx);
|
||||
|
||||
usdt_sib_arg_ret = bpf_usdt_arg(ctx, 0, &tmp);
|
||||
usdt_sib_arg = (short)tmp;
|
||||
usdt_sib_arg_size = bpf_usdt_arg_size(ctx, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user