Merge branch 'bpf-trampoline-support-jmp-mode'

Menglong Dong says:

====================
bpf trampoline support "jmp" mode

For now, the bpf trampoline is called by the "call" instruction. However,
it break the RSB and introduce extra overhead in x86_64 arch.

For example, we hook the function "foo" with fexit, the call and return
logic will be like this:
  call foo -> call trampoline -> call foo-body ->
  return foo-body -> return foo

As we can see above, there are 3 call, but 2 return, which break the RSB
balance. We can pseudo a "return" here, but it's not the best choice,
as it will still cause once RSB miss:
  call foo -> call trampoline -> call foo-body ->
  return foo-body -> return dummy -> return foo

The "return dummy" doesn't pair the "call trampoline", which can also
cause the RSB miss.

Therefore, we introduce the "jmp" mode for bpf trampoline, as advised by
Alexei in [1]. And the logic will become this:
  call foo -> jmp trampoline -> call foo-body ->
  return foo-body -> return foo

As we can see above, the RSB is totally balanced after this series.

In this series, we introduce the FTRACE_OPS_FL_JMP for ftrace to make it
use the "jmp" instruction instead of "call".

And we also do some adjustment to bpf_arch_text_poke() to allow us specify
the old and new poke_type.

For the BPF_TRAMP_F_SHARE_IPMODIFY case, we will fallback to the "call"
mode, as it need to get the function address from the stack, which is not
supported in "jmp" mode.

Before this series, we have the following performance with the bpf
benchmark:

  $ cd tools/testing/selftests/bpf
  $ ./benchs/run_bench_trigger.sh
  usermode-count :  890.171 ± 1.522M/s
  kernel-count   :  409.184 ± 0.330M/s
  syscall-count  :   26.792 ± 0.010M/s
  fentry         :  171.242 ± 0.322M/s
  fexit          :   80.544 ± 0.045M/s
  fmodret        :   78.301 ± 0.065M/s
  rawtp          :  192.906 ± 0.900M/s
  tp             :   81.883 ± 0.209M/s
  kprobe         :   52.029 ± 0.113M/s
  kprobe-multi   :   62.237 ± 0.060M/s
  kprobe-multi-all:    4.761 ± 0.014M/s
  kretprobe      :   23.779 ± 0.046M/s
  kretprobe-multi:   29.134 ± 0.012M/s
  kretprobe-multi-all:    3.822 ± 0.003M/

And after this series, we have the following performance:

  usermode-count :  890.443 ± 0.307M/s
  kernel-count   :  416.139 ± 0.055M/s
  syscall-count  :   31.037 ± 0.813M/s
  fentry         :  169.549 ± 0.519M/s
  fexit          :  136.540 ± 0.518M/s
  fmodret        :  159.248 ± 0.188M/s
  rawtp          :  194.475 ± 0.144M/s
  tp             :   84.505 ± 0.041M/s
  kprobe         :   59.951 ± 0.071M/s
  kprobe-multi   :   63.153 ± 0.177M/s
  kprobe-multi-all:    4.699 ± 0.012M/s
  kretprobe      :   23.740 ± 0.015M/s
  kretprobe-multi:   29.301 ± 0.022M/s
  kretprobe-multi-all:    3.869 ± 0.005M/s

As we can see above, the performance of fexit increase from 80.544M/s to
136.540M/s, and the "fmodret" increase from 78.301M/s to 159.248M/s.

Link: https://lore.kernel.org/bpf/20251117034906.32036-1-dongml2@chinatelecom.cn/
Changes since v2:
* reject if the addr is already "jmp" in register_ftrace_direct() and
  __modify_ftrace_direct() in the 1st patch.
* fix compile error in powerpc in the 5th patch.
* changes in the 6th patch:
  - fix the compile error by wrapping the write to tr->fops->flags with
    CONFIG_DYNAMIC_FTRACE_WITH_JMP
  - reset BPF_TRAMP_F_SKIP_FRAME when the second try of modify_fentry in
    bpf_trampoline_update()

Link: https://lore.kernel.org/bpf/20251114092450.172024-1-dongml2@chinatelecom.cn/
Changes since v1:
* change the bool parameter that we add to save_args() to "u32 flags"
* rename bpf_trampoline_need_jmp() to bpf_trampoline_use_jmp()
* add new function parameter to bpf_arch_text_poke instead of introduce
  bpf_arch_text_poke_type()
* rename bpf_text_poke to bpf_trampoline_update_fentry
* remove the BPF_TRAMP_F_JMPED and check the current mode with the origin
  flags instead.

Link: https://lore.kernel.org/bpf/CAADnVQLX54sVi1oaHrkSiLqjJaJdm3TQjoVrgU-LZimK6iDcSA@mail.gmail.com/[1]
====================

Acked-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Link: https://patch.msgid.link/20251118123639.688444-1-dongml2@chinatelecom.cn
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
Alexei Starovoitov 2025-11-24 09:45:27 -08:00
commit acf8726466
15 changed files with 225 additions and 65 deletions

View File

@ -2934,8 +2934,9 @@ static int gen_branch_or_nop(enum aarch64_insn_branch_type type, void *ip,
* The dummy_tramp is used to prevent another CPU from jumping to unknown
* locations during the patching process, making the patching process easier.
*/
int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
void *old_addr, void *new_addr)
int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t,
enum bpf_text_poke_type new_t, void *old_addr,
void *new_addr)
{
int ret;
u32 old_insn;
@ -2979,14 +2980,13 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
!poking_bpf_entry))
return -EINVAL;
if (poke_type == BPF_MOD_CALL)
branch_type = AARCH64_INSN_BRANCH_LINK;
else
branch_type = AARCH64_INSN_BRANCH_NOLINK;
branch_type = old_t == BPF_MOD_CALL ? AARCH64_INSN_BRANCH_LINK :
AARCH64_INSN_BRANCH_NOLINK;
if (gen_branch_or_nop(branch_type, ip, old_addr, plt, &old_insn) < 0)
return -EFAULT;
branch_type = new_t == BPF_MOD_CALL ? AARCH64_INSN_BRANCH_LINK :
AARCH64_INSN_BRANCH_NOLINK;
if (gen_branch_or_nop(branch_type, ip, new_addr, plt, &new_insn) < 0)
return -EFAULT;

View File

@ -1284,11 +1284,12 @@ void *bpf_arch_text_copy(void *dst, void *src, size_t len)
return ret ? ERR_PTR(-EINVAL) : dst;
}
int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
void *old_addr, void *new_addr)
int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t,
enum bpf_text_poke_type new_t, void *old_addr,
void *new_addr)
{
int ret;
bool is_call = (poke_type == BPF_MOD_CALL);
bool is_call;
u32 old_insns[LOONGARCH_LONG_JUMP_NINSNS] = {[0 ... 4] = INSN_NOP};
u32 new_insns[LOONGARCH_LONG_JUMP_NINSNS] = {[0 ... 4] = INSN_NOP};
@ -1298,6 +1299,7 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
if (!is_bpf_text_address((unsigned long)ip))
return -ENOTSUPP;
is_call = old_t == BPF_MOD_CALL;
ret = emit_jump_or_nops(old_addr, ip, old_insns, is_call);
if (ret)
return ret;
@ -1305,6 +1307,7 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
if (memcmp(ip, old_insns, LOONGARCH_LONG_JUMP_NBYTES))
return -EFAULT;
is_call = new_t == BPF_MOD_CALL;
ret = emit_jump_or_nops(new_addr, ip, new_insns, is_call);
if (ret)
return ret;

View File

@ -1107,8 +1107,9 @@ static void do_isync(void *info __maybe_unused)
* execute isync (or some CSI) so that they don't go back into the
* trampoline again.
*/
int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
void *old_addr, void *new_addr)
int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t,
enum bpf_text_poke_type new_t, void *old_addr,
void *new_addr)
{
unsigned long bpf_func, bpf_func_end, size, offset;
ppc_inst_t old_inst, new_inst;
@ -1119,7 +1120,6 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
return -EOPNOTSUPP;
bpf_func = (unsigned long)ip;
branch_flags = poke_type == BPF_MOD_CALL ? BRANCH_SET_LINK : 0;
/* We currently only support poking bpf programs */
if (!__bpf_address_lookup(bpf_func, &size, &offset, name)) {
@ -1132,7 +1132,7 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
* an unconditional branch instruction at im->ip_after_call
*/
if (offset) {
if (poke_type != BPF_MOD_JUMP) {
if (old_t == BPF_MOD_CALL || new_t == BPF_MOD_CALL) {
pr_err("%s (0x%lx): calls are not supported in bpf prog body\n", __func__,
bpf_func);
return -EOPNOTSUPP;
@ -1166,6 +1166,7 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
}
old_inst = ppc_inst(PPC_RAW_NOP());
branch_flags = old_t == BPF_MOD_CALL ? BRANCH_SET_LINK : 0;
if (old_addr) {
if (is_offset_in_branch_range(ip - old_addr))
create_branch(&old_inst, ip, (unsigned long)old_addr, branch_flags);
@ -1174,6 +1175,7 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
branch_flags);
}
new_inst = ppc_inst(PPC_RAW_NOP());
branch_flags = new_t == BPF_MOD_CALL ? BRANCH_SET_LINK : 0;
if (new_addr) {
if (is_offset_in_branch_range(ip - new_addr))
create_branch(&new_inst, ip, (unsigned long)new_addr, branch_flags);

View File

@ -852,17 +852,19 @@ static int gen_jump_or_nops(void *target, void *ip, u32 *insns, bool is_call)
return emit_jump_and_link(is_call ? RV_REG_T0 : RV_REG_ZERO, rvoff, false, &ctx);
}
int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
void *old_addr, void *new_addr)
int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t,
enum bpf_text_poke_type new_t, void *old_addr,
void *new_addr)
{
u32 old_insns[RV_FENTRY_NINSNS], new_insns[RV_FENTRY_NINSNS];
bool is_call = poke_type == BPF_MOD_CALL;
bool is_call;
int ret;
if (!is_kernel_text((unsigned long)ip) &&
!is_bpf_text_address((unsigned long)ip))
return -ENOTSUPP;
is_call = old_t == BPF_MOD_CALL;
ret = gen_jump_or_nops(old_addr, ip, old_insns, is_call);
if (ret)
return ret;
@ -870,6 +872,7 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
if (memcmp(ip, old_insns, RV_FENTRY_NBYTES))
return -EFAULT;
is_call = new_t == BPF_MOD_CALL;
ret = gen_jump_or_nops(new_addr, ip, new_insns, is_call);
if (ret)
return ret;
@ -1131,7 +1134,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
store_args(nr_arg_slots, args_off, ctx);
/* skip to actual body of traced function */
if (flags & BPF_TRAMP_F_SKIP_FRAME)
if (flags & BPF_TRAMP_F_ORIG_STACK)
orig_call += RV_FENTRY_NINSNS * 4;
if (flags & BPF_TRAMP_F_CALL_ORIG) {

View File

@ -2413,8 +2413,9 @@ bool bpf_jit_supports_far_kfunc_call(void)
return true;
}
int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
void *old_addr, void *new_addr)
int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t,
enum bpf_text_poke_type new_t, void *old_addr,
void *new_addr)
{
struct bpf_plt expected_plt, current_plt, new_plt, *plt;
struct {
@ -2431,7 +2432,7 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
if (insn.opc != (0xc004 | (old_addr ? 0xf0 : 0)))
return -EINVAL;
if (t == BPF_MOD_JUMP &&
if ((new_t == BPF_MOD_JUMP || old_t == BPF_MOD_JUMP) &&
insn.disp == ((char *)new_addr - (char *)ip) >> 1) {
/*
* The branch already points to the destination,

View File

@ -230,6 +230,7 @@ config X86
select HAVE_DYNAMIC_FTRACE_WITH_ARGS if X86_64
select HAVE_FTRACE_REGS_HAVING_PT_REGS if X86_64
select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
select HAVE_DYNAMIC_FTRACE_WITH_JMP if X86_64
select HAVE_SAMPLE_FTRACE_DIRECT if X86_64
select HAVE_SAMPLE_FTRACE_DIRECT_MULTI if X86_64
select HAVE_EBPF_JIT

View File

@ -74,7 +74,12 @@ static const char *ftrace_call_replace(unsigned long ip, unsigned long addr)
* No need to translate into a callthunk. The trampoline does
* the depth accounting itself.
*/
return text_gen_insn(CALL_INSN_OPCODE, (void *)ip, (void *)addr);
if (ftrace_is_jmp(addr)) {
addr = ftrace_jmp_get(addr);
return text_gen_insn(JMP32_INSN_OPCODE, (void *)ip, (void *)addr);
} else {
return text_gen_insn(CALL_INSN_OPCODE, (void *)ip, (void *)addr);
}
}
static int ftrace_verify_code(unsigned long ip, const char *old_code)

View File

@ -285,8 +285,18 @@ SYM_INNER_LABEL(ftrace_regs_caller_end, SYM_L_GLOBAL)
ANNOTATE_NOENDBR
RET
1:
testb $1, %al
jz 2f
andq $0xfffffffffffffffe, %rax
movq %rax, MCOUNT_REG_SIZE+8(%rsp)
restore_mcount_regs
/* Restore flags */
popfq
RET
/* Swap the flags with orig_rax */
1: movq MCOUNT_REG_SIZE(%rsp), %rdi
2: movq MCOUNT_REG_SIZE(%rsp), %rdi
movq %rdi, MCOUNT_REG_SIZE-8(%rsp)
movq %rax, MCOUNT_REG_SIZE(%rsp)

View File

@ -597,7 +597,8 @@ static int emit_jump(u8 **pprog, void *func, void *ip)
return emit_patch(pprog, func, ip, 0xE9);
}
static int __bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
static int __bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t,
enum bpf_text_poke_type new_t,
void *old_addr, void *new_addr)
{
const u8 *nop_insn = x86_nops[5];
@ -607,9 +608,9 @@ static int __bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
int ret;
memcpy(old_insn, nop_insn, X86_PATCH_SIZE);
if (old_addr) {
if (old_t != BPF_MOD_NOP && old_addr) {
prog = old_insn;
ret = t == BPF_MOD_CALL ?
ret = old_t == BPF_MOD_CALL ?
emit_call(&prog, old_addr, ip) :
emit_jump(&prog, old_addr, ip);
if (ret)
@ -617,9 +618,9 @@ static int __bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
}
memcpy(new_insn, nop_insn, X86_PATCH_SIZE);
if (new_addr) {
if (new_t != BPF_MOD_NOP && new_addr) {
prog = new_insn;
ret = t == BPF_MOD_CALL ?
ret = new_t == BPF_MOD_CALL ?
emit_call(&prog, new_addr, ip) :
emit_jump(&prog, new_addr, ip);
if (ret)
@ -640,8 +641,9 @@ static int __bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
return ret;
}
int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
void *old_addr, void *new_addr)
int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t,
enum bpf_text_poke_type new_t, void *old_addr,
void *new_addr)
{
if (!is_kernel_text((long)ip) &&
!is_bpf_text_address((long)ip))
@ -655,7 +657,7 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
if (is_endbr(ip))
ip += ENDBR_INSN_SIZE;
return __bpf_arch_text_poke(ip, t, old_addr, new_addr);
return __bpf_arch_text_poke(ip, old_t, new_t, old_addr, new_addr);
}
#define EMIT_LFENCE() EMIT3(0x0F, 0xAE, 0xE8)
@ -897,12 +899,13 @@ static void bpf_tail_call_direct_fixup(struct bpf_prog *prog)
target = array->ptrs[poke->tail_call.key];
if (target) {
ret = __bpf_arch_text_poke(poke->tailcall_target,
BPF_MOD_JUMP, NULL,
BPF_MOD_NOP, BPF_MOD_JUMP,
NULL,
(u8 *)target->bpf_func +
poke->adj_off);
BUG_ON(ret < 0);
ret = __bpf_arch_text_poke(poke->tailcall_bypass,
BPF_MOD_JUMP,
BPF_MOD_JUMP, BPF_MOD_NOP,
(u8 *)poke->tailcall_target +
X86_PATCH_SIZE, NULL);
BUG_ON(ret < 0);
@ -2847,9 +2850,10 @@ static int get_nr_used_regs(const struct btf_func_model *m)
}
static void save_args(const struct btf_func_model *m, u8 **prog,
int stack_size, bool for_call_origin)
int stack_size, bool for_call_origin, u32 flags)
{
int arg_regs, first_off = 0, nr_regs = 0, nr_stack_slots = 0;
bool use_jmp = bpf_trampoline_use_jmp(flags);
int i, j;
/* Store function arguments to stack.
@ -2890,7 +2894,7 @@ static void save_args(const struct btf_func_model *m, u8 **prog,
*/
for (j = 0; j < arg_regs; j++) {
emit_ldx(prog, BPF_DW, BPF_REG_0, BPF_REG_FP,
nr_stack_slots * 8 + 0x18);
nr_stack_slots * 8 + 16 + (!use_jmp) * 8);
emit_stx(prog, BPF_DW, BPF_REG_FP, BPF_REG_0,
-stack_size);
@ -3284,12 +3288,17 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
* should be 16-byte aligned. Following code depend on
* that stack_size is already 8-byte aligned.
*/
stack_size += (stack_size % 16) ? 0 : 8;
if (bpf_trampoline_use_jmp(flags)) {
/* no rip in the "jmp" case */
stack_size += (stack_size % 16) ? 8 : 0;
} else {
stack_size += (stack_size % 16) ? 0 : 8;
}
}
arg_stack_off = stack_size;
if (flags & BPF_TRAMP_F_SKIP_FRAME) {
if (flags & BPF_TRAMP_F_CALL_ORIG) {
/* skip patched call instruction and point orig_call to actual
* body of the kernel function.
*/
@ -3344,7 +3353,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -ip_off);
}
save_args(m, &prog, regs_off, false);
save_args(m, &prog, regs_off, false, flags);
if (flags & BPF_TRAMP_F_CALL_ORIG) {
/* arg1: mov rdi, im */
@ -3377,7 +3386,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
if (flags & BPF_TRAMP_F_CALL_ORIG) {
restore_regs(m, &prog, regs_off);
save_args(m, &prog, arg_stack_off, true);
save_args(m, &prog, arg_stack_off, true, flags);
if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) {
/* Before calling the original function, load the
@ -3979,6 +3988,7 @@ void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke,
struct bpf_prog *new, struct bpf_prog *old)
{
u8 *old_addr, *new_addr, *old_bypass_addr;
enum bpf_text_poke_type t;
int ret;
old_bypass_addr = old ? NULL : poke->bypass_addr;
@ -3991,21 +4001,22 @@ void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke,
* the kallsyms check.
*/
if (new) {
t = old_addr ? BPF_MOD_JUMP : BPF_MOD_NOP;
ret = __bpf_arch_text_poke(poke->tailcall_target,
BPF_MOD_JUMP,
t, BPF_MOD_JUMP,
old_addr, new_addr);
BUG_ON(ret < 0);
if (!old) {
ret = __bpf_arch_text_poke(poke->tailcall_bypass,
BPF_MOD_JUMP,
BPF_MOD_JUMP, BPF_MOD_NOP,
poke->bypass_addr,
NULL);
BUG_ON(ret < 0);
}
} else {
t = old_bypass_addr ? BPF_MOD_JUMP : BPF_MOD_NOP;
ret = __bpf_arch_text_poke(poke->tailcall_bypass,
BPF_MOD_JUMP,
old_bypass_addr,
t, BPF_MOD_JUMP, old_bypass_addr,
poke->bypass_addr);
BUG_ON(ret < 0);
/* let other CPUs finish the execution of program
@ -4014,9 +4025,9 @@ void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke,
*/
if (!ret)
synchronize_rcu();
t = old_addr ? BPF_MOD_JUMP : BPF_MOD_NOP;
ret = __bpf_arch_text_poke(poke->tailcall_target,
BPF_MOD_JUMP,
old_addr, NULL);
t, BPF_MOD_NOP, old_addr, NULL);
BUG_ON(ret < 0);
}
}

View File

@ -1264,6 +1264,18 @@ typedef void (*bpf_trampoline_exit_t)(struct bpf_prog *prog, u64 start,
bpf_trampoline_enter_t bpf_trampoline_enter(const struct bpf_prog *prog);
bpf_trampoline_exit_t bpf_trampoline_exit(const struct bpf_prog *prog);
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_JMP
static inline bool bpf_trampoline_use_jmp(u64 flags)
{
return flags & BPF_TRAMP_F_CALL_ORIG && !(flags & BPF_TRAMP_F_SKIP_FRAME);
}
#else
static inline bool bpf_trampoline_use_jmp(u64 flags)
{
return false;
}
#endif
struct bpf_ksym {
unsigned long start;
unsigned long end;
@ -3698,12 +3710,14 @@ static inline u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type,
#endif /* CONFIG_INET */
enum bpf_text_poke_type {
BPF_MOD_NOP,
BPF_MOD_CALL,
BPF_MOD_JUMP,
};
int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
void *addr1, void *addr2);
int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t,
enum bpf_text_poke_type new_t, void *old_addr,
void *new_addr);
void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke,
struct bpf_prog *new, struct bpf_prog *old);

View File

@ -359,6 +359,7 @@ enum {
FTRACE_OPS_FL_DIRECT = BIT(17),
FTRACE_OPS_FL_SUBOP = BIT(18),
FTRACE_OPS_FL_GRAPH = BIT(19),
FTRACE_OPS_FL_JMP = BIT(20),
};
#ifndef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
@ -577,6 +578,38 @@ static inline void arch_ftrace_set_direct_caller(struct ftrace_regs *fregs,
unsigned long addr) { }
#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_JMP
static inline bool ftrace_is_jmp(unsigned long addr)
{
return addr & 1;
}
static inline unsigned long ftrace_jmp_set(unsigned long addr)
{
return addr | 1UL;
}
static inline unsigned long ftrace_jmp_get(unsigned long addr)
{
return addr & ~1UL;
}
#else
static inline bool ftrace_is_jmp(unsigned long addr)
{
return false;
}
static inline unsigned long ftrace_jmp_set(unsigned long addr)
{
return addr;
}
static inline unsigned long ftrace_jmp_get(unsigned long addr)
{
return addr;
}
#endif /* CONFIG_DYNAMIC_FTRACE_WITH_JMP */
#ifdef CONFIG_STACK_TRACER
int stack_trace_sysctl(const struct ctl_table *table, int write, void *buffer,

View File

@ -3150,8 +3150,9 @@ int __weak skb_copy_bits(const struct sk_buff *skb, int offset, void *to,
return -EFAULT;
}
int __weak bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
void *addr1, void *addr2)
int __weak bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t,
enum bpf_text_poke_type new_t, void *old_addr,
void *new_addr)
{
return -ENOTSUPP;
}

View File

@ -175,23 +175,42 @@ static struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
return tr;
}
static int unregister_fentry(struct bpf_trampoline *tr, void *old_addr)
static int bpf_trampoline_update_fentry(struct bpf_trampoline *tr, u32 orig_flags,
void *old_addr, void *new_addr)
{
enum bpf_text_poke_type new_t = BPF_MOD_CALL, old_t = BPF_MOD_CALL;
void *ip = tr->func.addr;
if (!new_addr)
new_t = BPF_MOD_NOP;
else if (bpf_trampoline_use_jmp(tr->flags))
new_t = BPF_MOD_JUMP;
if (!old_addr)
old_t = BPF_MOD_NOP;
else if (bpf_trampoline_use_jmp(orig_flags))
old_t = BPF_MOD_JUMP;
return bpf_arch_text_poke(ip, old_t, new_t, old_addr, new_addr);
}
static int unregister_fentry(struct bpf_trampoline *tr, u32 orig_flags,
void *old_addr)
{
int ret;
if (tr->func.ftrace_managed)
ret = unregister_ftrace_direct(tr->fops, (long)old_addr, false);
else
ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, NULL);
ret = bpf_trampoline_update_fentry(tr, orig_flags, old_addr, NULL);
return ret;
}
static int modify_fentry(struct bpf_trampoline *tr, void *old_addr, void *new_addr,
static int modify_fentry(struct bpf_trampoline *tr, u32 orig_flags,
void *old_addr, void *new_addr,
bool lock_direct_mutex)
{
void *ip = tr->func.addr;
int ret;
if (tr->func.ftrace_managed) {
@ -200,7 +219,8 @@ static int modify_fentry(struct bpf_trampoline *tr, void *old_addr, void *new_ad
else
ret = modify_ftrace_direct_nolock(tr->fops, (long)new_addr);
} else {
ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, new_addr);
ret = bpf_trampoline_update_fentry(tr, orig_flags, old_addr,
new_addr);
}
return ret;
}
@ -225,7 +245,7 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
return ret;
ret = register_ftrace_direct(tr->fops, (long)new_addr);
} else {
ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, NULL, new_addr);
ret = bpf_trampoline_update_fentry(tr, 0, NULL, new_addr);
}
return ret;
@ -336,8 +356,9 @@ static void bpf_tramp_image_put(struct bpf_tramp_image *im)
* call_rcu_tasks() is not necessary.
*/
if (im->ip_after_call) {
int err = bpf_arch_text_poke(im->ip_after_call, BPF_MOD_JUMP,
NULL, im->ip_epilogue);
int err = bpf_arch_text_poke(im->ip_after_call, BPF_MOD_NOP,
BPF_MOD_JUMP, NULL,
im->ip_epilogue);
WARN_ON(err);
if (IS_ENABLED(CONFIG_TASKS_RCU))
call_rcu_tasks(&im->rcu, __bpf_tramp_image_put_rcu_tasks);
@ -410,7 +431,7 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut
return PTR_ERR(tlinks);
if (total == 0) {
err = unregister_fentry(tr, tr->cur_image->image);
err = unregister_fentry(tr, orig_flags, tr->cur_image->image);
bpf_tramp_image_put(tr->cur_image);
tr->cur_image = NULL;
goto out;
@ -434,9 +455,20 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
again:
if ((tr->flags & BPF_TRAMP_F_SHARE_IPMODIFY) &&
(tr->flags & BPF_TRAMP_F_CALL_ORIG))
tr->flags |= BPF_TRAMP_F_ORIG_STACK;
if (tr->flags & BPF_TRAMP_F_CALL_ORIG) {
if (tr->flags & BPF_TRAMP_F_SHARE_IPMODIFY) {
/* The BPF_TRAMP_F_SKIP_FRAME can be cleared in the
* first try, reset it in the second try.
*/
tr->flags |= BPF_TRAMP_F_ORIG_STACK | BPF_TRAMP_F_SKIP_FRAME;
} else if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_JMP)) {
/* Use "jmp" instead of "call" for the trampoline
* in the origin call case, and we don't need to
* skip the frame.
*/
tr->flags &= ~BPF_TRAMP_F_SKIP_FRAME;
}
}
#endif
size = arch_bpf_trampoline_size(&tr->func.model, tr->flags,
@ -467,10 +499,18 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut
if (err)
goto out_free;
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_JMP
if (bpf_trampoline_use_jmp(tr->flags))
tr->fops->flags |= FTRACE_OPS_FL_JMP;
else
tr->fops->flags &= ~FTRACE_OPS_FL_JMP;
#endif
WARN_ON(tr->cur_image && total == 0);
if (tr->cur_image)
/* progs already running at this address */
err = modify_fentry(tr, tr->cur_image->image, im->image, lock_direct_mutex);
err = modify_fentry(tr, orig_flags, tr->cur_image->image,
im->image, lock_direct_mutex);
else
/* first time registering */
err = register_fentry(tr, im->image);
@ -493,8 +533,15 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut
tr->cur_image = im;
out:
/* If any error happens, restore previous flags */
if (err)
if (err) {
tr->flags = orig_flags;
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_JMP
if (bpf_trampoline_use_jmp(tr->flags))
tr->fops->flags |= FTRACE_OPS_FL_JMP;
else
tr->fops->flags &= ~FTRACE_OPS_FL_JMP;
#endif
}
kfree(tlinks);
return err;
@ -570,7 +617,8 @@ static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link,
if (err)
return err;
tr->extension_prog = link->link.prog;
return bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP, NULL,
return bpf_arch_text_poke(tr->func.addr, BPF_MOD_NOP,
BPF_MOD_JUMP, NULL,
link->link.prog->bpf_func);
}
if (cnt >= BPF_MAX_TRAMP_LINKS)
@ -618,6 +666,7 @@ static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link,
if (kind == BPF_TRAMP_REPLACE) {
WARN_ON_ONCE(!tr->extension_prog);
err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP,
BPF_MOD_NOP,
tr->extension_prog->bpf_func, NULL);
tr->extension_prog = NULL;
guard(mutex)(&tgt_prog->aux->ext_mutex);

View File

@ -80,6 +80,12 @@ config HAVE_DYNAMIC_FTRACE_NO_PATCHABLE
If the architecture generates __patchable_function_entries sections
but does not want them included in the ftrace locations.
config HAVE_DYNAMIC_FTRACE_WITH_JMP
bool
help
If the architecture supports to replace the __fentry__ with a
"jmp" instruction.
config HAVE_SYSCALL_TRACEPOINTS
bool
help
@ -330,6 +336,12 @@ config DYNAMIC_FTRACE_WITH_ARGS
depends on DYNAMIC_FTRACE
depends on HAVE_DYNAMIC_FTRACE_WITH_ARGS
config DYNAMIC_FTRACE_WITH_JMP
def_bool y
depends on DYNAMIC_FTRACE
depends on DYNAMIC_FTRACE_WITH_DIRECT_CALLS
depends on HAVE_DYNAMIC_FTRACE_WITH_JMP
config FPROBE
bool "Kernel Function Probe (fprobe)"
depends on HAVE_FUNCTION_GRAPH_FREGS && HAVE_FTRACE_GRAPH_FUNC

View File

@ -5951,7 +5951,8 @@ static void remove_direct_functions_hash(struct ftrace_hash *hash, unsigned long
for (i = 0; i < size; i++) {
hlist_for_each_entry(entry, &hash->buckets[i], hlist) {
del = __ftrace_lookup_ip(direct_functions, entry->ip);
if (del && del->direct == addr) {
if (del && ftrace_jmp_get(del->direct) ==
ftrace_jmp_get(addr)) {
remove_hash_entry(direct_functions, del);
kfree(del);
}
@ -6016,8 +6017,15 @@ int register_ftrace_direct(struct ftrace_ops *ops, unsigned long addr)
if (ftrace_hash_empty(hash))
return -EINVAL;
/* This is a "raw" address, and this should never happen. */
if (WARN_ON_ONCE(ftrace_is_jmp(addr)))
return -EINVAL;
mutex_lock(&direct_mutex);
if (ops->flags & FTRACE_OPS_FL_JMP)
addr = ftrace_jmp_set(addr);
/* Make sure requested entries are not already registered.. */
size = 1 << hash->size_bits;
for (i = 0; i < size; i++) {
@ -6138,6 +6146,13 @@ __modify_ftrace_direct(struct ftrace_ops *ops, unsigned long addr)
lockdep_assert_held_once(&direct_mutex);
/* This is a "raw" address, and this should never happen. */
if (WARN_ON_ONCE(ftrace_is_jmp(addr)))
return -EINVAL;
if (ops->flags & FTRACE_OPS_FL_JMP)
addr = ftrace_jmp_set(addr);
/* Enable the tmp_ops to have the same functions as the direct ops */
ftrace_ops_init(&tmp_ops);
tmp_ops.func_hash = ops->func_hash;