Merge branch 'libbpf-fix-usdt-sib-argument-handling-causing-unrecognized-register-error'

Jiawei Zhao says:

====================
libbpf: fix USDT SIB argument handling causing unrecognized register error

When using GCC on x86-64 to compile an usdt prog with -O1 or higher
optimization, the compiler will generate SIB addressing mode for global
array, e.g. "1@-96(%rbp,%rax,8)".

The current USDT implementation in libbpf cannot parse these two formats,
causing `bpf_program__attach_usdt()` to fail with -ENOENT
(unrecognized register).

This patch series adds support for SIB addressing mode in USDT probes.
The main changes include:
- add correct handling logic for SIB-addressed arguments in
  `parse_usdt_arg`.
- add an usdt_o2 test case to cover SIB addressing mode.

Testing shows that the SIB probe correctly generates 8@(%rcx,%rax,8)
argument spec and passes all validation checks.

The modification history of this patch series:
Change since v1:
- refactor the code to make it more readable
- modify the commit message to explain why and how

Change since v2:
- fix the `scale` uninitialized error

Change since v3:
- force -O2 optimization for usdt.test.o to generate SIB addressing usdt
  and pass all test cases.

Change since v4:
- split the patch into two parts, one for the fix and the other for the
  test

Change since v5:
- Only enable optimization for x86 architecture to generate SIB addressing
  usdt argument spec.

Change since v6:
- Add an usdt_o2 test case to cover SIB addressing mode.
- Reinstate the usdt.c test case.

Change since v7:
- Refactor modifications to __bpf_usdt_arg_spec to avoid increasing its size,
  achieving better compatibility
- Fix some minor code style issues
- Refactor the usdt_o2 test case, removing semaphore and adding GCC attribute
  to force -O2 optimization

Change since v8:
- Refactor the usdt_o2 test case, using assembly to force SIB addressing mode.

Change since v9:
- Only enable the usdt_o2 test case on x86_64 and i386 architectures since the
  SIB addressing mode is only supported on x86_64 and i386.

Change since v10:
- Replace `__attribute__((optimize("O2")))` with `#pragma GCC optimize("O1")`
  to fix the issue where the optimized compilation condition works improperly.
- Renamed test case usdt_o2 and relevant files name to usdt_o1 in that O1
  level optimization is enough to generate SIB addressing usdt argument spec.

Change since v11:
- Replace `STAP_PROBE1` with `STAP_PROBE_ASM`
- Use bit fields instead of bit shifting operations
- Merge the usdt_o1 test case into the usdt test case

Change since v12:
- This patch is same with the v12 but with a new version number.

Change since v13(resolve some review comments):
- https://lore.kernel.org/bpf/CAEf4BzZWd2zUC=U6uGJFF3EMZ7zWGLweQAG3CJWTeHy-5yFEPw@mail.gmail.com/
- https://lore.kernel.org/bpf/CAEf4Bzbs3hV_Q47+d93tTX13WkrpkpOb4=U04mZCjHyZg4aVdw@mail.gmail.com/

Change since v14:
- fix a typo in __bpf_usdt_arg_spec

Change since v15(resolve some review comments):
- https://lore.kernel.org/bpf/CAEf4BzaxuYijEfQMDFZ+CQdjxLuDZiesUXNA-SiopS+5+VxRaA@mail.gmail.com/
- https://lore.kernel.org/bpf/CAEf4BzaHi5kpuJ6OVvDU62LT5g0qHbWYMfb_FBQ3iuvvUF9fag@mail.gmail.com/
- https://lore.kernel.org/bpf/d438bf3a-a9c9-4d34-b814-63f2e9bb3a85@linux.dev/
====================

Link: https://patch.msgid.link/20250827053128.1301287-1-phoenix500526@163.com
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
This commit is contained in:
Andrii Nakryiko 2025-08-27 15:41:05 -07:00
commit 4c229f337e
4 changed files with 211 additions and 9 deletions

View File

@ -34,13 +34,32 @@ enum __bpf_usdt_arg_type {
BPF_USDT_ARG_CONST,
BPF_USDT_ARG_REG,
BPF_USDT_ARG_REG_DEREF,
BPF_USDT_ARG_SIB,
};
/*
* This struct layout is designed specifically to be backwards/forward
* compatible between libbpf versions for ARG_CONST, ARG_REG, and
* ARG_REG_DEREF modes. ARG_SIB requires libbpf v1.7+.
*/
struct __bpf_usdt_arg_spec {
/* u64 scalar interpreted depending on arg_type, see below */
__u64 val_off;
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
/* arg location case, see bpf_usdt_arg() for details */
enum __bpf_usdt_arg_type arg_type;
enum __bpf_usdt_arg_type arg_type: 8;
/* index register offset within struct pt_regs */
__u16 idx_reg_off: 12;
/* scale factor for index register (1, 2, 4, or 8) */
__u16 scale_bitshift: 4;
/* reserved for future use, keeps reg_off offset stable */
__u8 __reserved: 8;
#else
__u8 __reserved: 8;
__u16 idx_reg_off: 12;
__u16 scale_bitshift: 4;
enum __bpf_usdt_arg_type arg_type: 8;
#endif
/* offset of referenced register within struct pt_regs */
short reg_off;
/* whether arg should be interpreted as signed value */
@ -149,7 +168,7 @@ int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res)
{
struct __bpf_usdt_spec *spec;
struct __bpf_usdt_arg_spec *arg_spec;
unsigned long val;
unsigned long val, idx;
int err, spec_id;
*res = 0;
@ -202,6 +221,27 @@ int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res)
return err;
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
val >>= arg_spec->arg_bitshift;
#endif
break;
case BPF_USDT_ARG_SIB:
/* Arg is in memory addressed by SIB (Scale-Index-Base) mode
* (e.g., "-1@-96(%rbp,%rax,8)" in USDT arg spec). We first
* fetch the base register contents and the index register
* contents from pt_regs. Then we calculate the final address
* as base + (index * scale) + offset, and do a user-space
* probe read to fetch the argument value.
*/
err = bpf_probe_read_kernel(&val, sizeof(val), (void *)ctx + arg_spec->reg_off);
if (err)
return err;
err = bpf_probe_read_kernel(&idx, sizeof(idx), (void *)ctx + arg_spec->idx_reg_off);
if (err)
return err;
err = bpf_probe_read_user(&val, sizeof(val), (void *)(val + (idx << arg_spec->scale_bitshift) + arg_spec->val_off));
if (err)
return err;
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
val >>= arg_spec->arg_bitshift;
#endif
break;
default:

View File

@ -200,12 +200,23 @@ enum usdt_arg_type {
USDT_ARG_CONST,
USDT_ARG_REG,
USDT_ARG_REG_DEREF,
USDT_ARG_SIB,
};
/* should match exactly struct __bpf_usdt_arg_spec from usdt.bpf.h */
struct usdt_arg_spec {
__u64 val_off;
enum usdt_arg_type arg_type;
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
enum usdt_arg_type arg_type: 8;
__u16 idx_reg_off: 12;
__u16 scale_bitshift: 4;
__u8 __reserved: 8; /* keep reg_off offset stable */
#else
__u8 __reserved: 8; /* keep reg_off offset stable */
__u16 idx_reg_off: 12;
__u16 scale_bitshift: 4;
enum usdt_arg_type arg_type: 8;
#endif
short reg_off;
bool arg_signed;
char arg_bitshift;
@ -1283,11 +1294,51 @@ static int calc_pt_regs_off(const char *reg_name)
static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg, int *arg_sz)
{
char reg_name[16];
int len, reg_off;
long off;
char reg_name[16] = {0}, idx_reg_name[16] = {0};
int len, reg_off, idx_reg_off, scale = 1;
long off = 0;
if (sscanf(arg_str, " %d @ %ld ( %%%15[^)] ) %n", arg_sz, &off, reg_name, &len) == 3) {
if (sscanf(arg_str, " %d @ %ld ( %%%15[^,] , %%%15[^,] , %d ) %n",
arg_sz, &off, reg_name, idx_reg_name, &scale, &len) == 5 ||
sscanf(arg_str, " %d @ ( %%%15[^,] , %%%15[^,] , %d ) %n",
arg_sz, reg_name, idx_reg_name, &scale, &len) == 4 ||
sscanf(arg_str, " %d @ %ld ( %%%15[^,] , %%%15[^)] ) %n",
arg_sz, &off, reg_name, idx_reg_name, &len) == 4 ||
sscanf(arg_str, " %d @ ( %%%15[^,] , %%%15[^)] ) %n",
arg_sz, reg_name, idx_reg_name, &len) == 3
) {
/*
* Scale Index Base case:
* 1@-96(%rbp,%rax,8)
* 1@(%rbp,%rax,8)
* 1@-96(%rbp,%rax)
* 1@(%rbp,%rax)
*/
arg->arg_type = USDT_ARG_SIB;
arg->val_off = off;
reg_off = calc_pt_regs_off(reg_name);
if (reg_off < 0)
return reg_off;
arg->reg_off = reg_off;
idx_reg_off = calc_pt_regs_off(idx_reg_name);
if (idx_reg_off < 0)
return idx_reg_off;
arg->idx_reg_off = idx_reg_off;
/* validate scale factor and set fields directly */
switch (scale) {
case 1: arg->scale_bitshift = 0; break;
case 2: arg->scale_bitshift = 1; break;
case 4: arg->scale_bitshift = 2; break;
case 8: arg->scale_bitshift = 3; break;
default:
pr_warn("usdt: invalid SIB scale %d, expected 1, 2, 4, 8\n", scale);
return -EINVAL;
}
} else if (sscanf(arg_str, " %d @ %ld ( %%%15[^)] ) %n",
arg_sz, &off, reg_name, &len) == 3) {
/* Memory dereference case, e.g., -4@-20(%rbp) */
arg->arg_type = USDT_ARG_REG_DEREF;
arg->val_off = off;
@ -1306,6 +1357,7 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec
} else if (sscanf(arg_str, " %d @ %%%15s %n", arg_sz, reg_name, &len) == 2) {
/* Register read case, e.g., -4@%eax */
arg->arg_type = USDT_ARG_REG;
/* register read has no memory offset */
arg->val_off = 0;
reg_off = calc_pt_regs_off(reg_name);

View File

@ -40,12 +40,72 @@ static void __always_inline trigger_func(int x) {
}
}
#if defined(__x86_64__) || defined(__i386__)
/*
* SIB (Scale-Index-Base) addressing format: "size@(base_reg, index_reg, scale)"
* - 'size' is the size in bytes of the array element, and its sign indicates
* whether the type is signed (negative) or unsigned (positive).
* - 'base_reg' is the register holding the base address, normally rdx or edx
* - 'index_reg' is the register holding the index, normally rax or eax
* - 'scale' is the scaling factor (typically 1, 2, 4, or 8), which matches the
* size of the element type.
*
* For example, for an array of 'short' (signed 2-byte elements), the SIB spec would be:
* - size: -2 (negative because 'short' is signed)
* - scale: 2 (since sizeof(short) == 2)
*
* The resulting SIB format: "-2@(%%rdx,%%rax,2)" for x86_64, "-2@(%%edx,%%eax,2)" for i386
*/
static volatile short array[] = {-1, -2, -3, -4};
#if defined(__x86_64__)
#define USDT_SIB_ARG_SPEC -2@(%%rdx,%%rax,2)
#else
#define USDT_SIB_ARG_SPEC -2@(%%edx,%%eax,2)
#endif
unsigned short test_usdt_sib_semaphore SEC(".probes");
static void trigger_sib_spec(void)
{
/*
* Force SIB addressing with inline assembly.
*
* You must compile with -std=gnu99 or -std=c99 to use the
* STAP_PROBE_ASM macro.
*
* The STAP_PROBE_ASM macro generates a quoted string that gets
* inserted between the surrounding assembly instructions. In this
* case, USDT_SIB_ARG_SPEC is embedded directly into the instruction
* stream, creating a probe point between the asm statement boundaries.
* It works fine with gcc/clang.
*
* Register constraints:
* - "d"(array): Binds the 'array' variable to %rdx or %edx register
* - "a"(0): Binds the constant 0 to %rax or %eax register
* These ensure that when USDT_SIB_ARG_SPEC references %%rdx(%edx) and
* %%rax(%eax), they contain the expected values for SIB addressing.
*
* The "memory" clobber prevents the compiler from reordering memory
* accesses around the probe point, ensuring that the probe behavior
* is predictable and consistent.
*/
asm volatile(
STAP_PROBE_ASM(test, usdt_sib, USDT_SIB_ARG_SPEC)
:
: "d"(array), "a"(0)
: "memory"
);
}
#endif
static void subtest_basic_usdt(void)
{
LIBBPF_OPTS(bpf_usdt_opts, opts);
struct test_usdt *skel;
struct test_usdt__bss *bss;
int err, i;
const __u64 expected_cookie = 0xcafedeadbeeffeed;
skel = test_usdt__open_and_load();
if (!ASSERT_OK_PTR(skel, "skel_open"))
@ -59,20 +119,29 @@ static void subtest_basic_usdt(void)
goto cleanup;
/* usdt0 won't be auto-attached */
opts.usdt_cookie = 0xcafedeadbeeffeed;
opts.usdt_cookie = expected_cookie;
skel->links.usdt0 = bpf_program__attach_usdt(skel->progs.usdt0,
0 /*self*/, "/proc/self/exe",
"test", "usdt0", &opts);
if (!ASSERT_OK_PTR(skel->links.usdt0, "usdt0_link"))
goto cleanup;
#if defined(__x86_64__) || defined(__i386__)
opts.usdt_cookie = expected_cookie;
skel->links.usdt_sib = bpf_program__attach_usdt(skel->progs.usdt_sib,
0 /*self*/, "/proc/self/exe",
"test", "usdt_sib", &opts);
if (!ASSERT_OK_PTR(skel->links.usdt_sib, "usdt_sib_link"))
goto cleanup;
#endif
trigger_func(1);
ASSERT_EQ(bss->usdt0_called, 1, "usdt0_called");
ASSERT_EQ(bss->usdt3_called, 1, "usdt3_called");
ASSERT_EQ(bss->usdt12_called, 1, "usdt12_called");
ASSERT_EQ(bss->usdt0_cookie, 0xcafedeadbeeffeed, "usdt0_cookie");
ASSERT_EQ(bss->usdt0_cookie, expected_cookie, "usdt0_cookie");
ASSERT_EQ(bss->usdt0_arg_cnt, 0, "usdt0_arg_cnt");
ASSERT_EQ(bss->usdt0_arg_ret, -ENOENT, "usdt0_arg_ret");
ASSERT_EQ(bss->usdt0_arg_size, -ENOENT, "usdt0_arg_size");
@ -156,6 +225,16 @@ static void subtest_basic_usdt(void)
ASSERT_EQ(bss->usdt3_args[1], 42, "usdt3_arg2");
ASSERT_EQ(bss->usdt3_args[2], (uintptr_t)&bla, "usdt3_arg3");
#if defined(__x86_64__) || defined(__i386__)
trigger_sib_spec();
ASSERT_EQ(bss->usdt_sib_called, 1, "usdt_sib_called");
ASSERT_EQ(bss->usdt_sib_cookie, expected_cookie, "usdt_sib_cookie");
ASSERT_EQ(bss->usdt_sib_arg_cnt, 1, "usdt_sib_arg_cnt");
ASSERT_EQ(bss->usdt_sib_arg, nums[0], "usdt_sib_arg");
ASSERT_EQ(bss->usdt_sib_arg_ret, 0, "usdt_sib_arg_ret");
ASSERT_EQ(bss->usdt_sib_arg_size, sizeof(nums[0]), "usdt_sib_arg_size");
#endif
cleanup:
test_usdt__destroy(skel);
}

View File

@ -107,4 +107,35 @@ int BPF_USDT(usdt12, int a1, int a2, long a3, long a4, unsigned a5,
return 0;
}
int usdt_sib_called;
u64 usdt_sib_cookie;
int usdt_sib_arg_cnt;
int usdt_sib_arg_ret;
short usdt_sib_arg;
int usdt_sib_arg_size;
/*
* usdt_sib is only tested on x86-related architectures, so it requires
* manual attach since auto-attach will panic tests under other architectures
*/
SEC("usdt")
int usdt_sib(struct pt_regs *ctx)
{
long tmp;
if (my_pid != (bpf_get_current_pid_tgid() >> 32))
return 0;
__sync_fetch_and_add(&usdt_sib_called, 1);
usdt_sib_cookie = bpf_usdt_cookie(ctx);
usdt_sib_arg_cnt = bpf_usdt_arg_cnt(ctx);
usdt_sib_arg_ret = bpf_usdt_arg(ctx, 0, &tmp);
usdt_sib_arg = (short)tmp;
usdt_sib_arg_size = bpf_usdt_arg_size(ctx, 0);
return 0;
}
char _license[] SEC("license") = "GPL";