From 7bdbf7446305cb65c510c16d57cde82bc76b234a Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 1 Apr 2024 19:13:02 -0700 Subject: [PATCH 1/4] bpf: add special internal-only MOV instruction to resolve per-CPU addrs Add a new BPF instruction for resolving absolute addresses of per-CPU data from their per-CPU offsets. This instruction is internal-only and users are not allowed to use them directly. They will only be used for internal inlining optimizations for now between BPF verifier and BPF JITs. We use a special BPF_MOV | BPF_ALU64 | BPF_X form with insn->off field set to BPF_ADDR_PERCPU = -1. I used negative offset value to distinguish them from positive ones used by user-exposed instructions. Such instruction performs a resolution of a per-CPU offset stored in a register to a valid kernel address which can be dereferenced. It is useful in any use case where absolute address of a per-CPU data has to be resolved (e.g., in inlining bpf_map_lookup_elem()). BPF disassembler is also taught to recognize them to support dumping final BPF assembly code (non-JIT'ed version). Add arch-specific way for BPF JITs to mark support for this instructions. This patch also adds support for these instructions in x86-64 BPF JIT. Signed-off-by: Andrii Nakryiko Acked-by: John Fastabend Link: https://lore.kernel.org/r/20240402021307.1012571-2-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- arch/x86/net/bpf_jit_comp.c | 16 ++++++++++++++++ include/linux/filter.h | 20 ++++++++++++++++++++ kernel/bpf/core.c | 5 +++++ kernel/bpf/disasm.c | 14 ++++++++++++++ 4 files changed, 55 insertions(+) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 3b639d6f2f54..af89dd117dce 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1382,6 +1382,17 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image maybe_emit_mod(&prog, AUX_REG, dst_reg, true); EMIT3(0x0F, 0x44, add_2reg(0xC0, AUX_REG, dst_reg)); break; + } else if (insn_is_mov_percpu_addr(insn)) { + u32 off = (u32)(unsigned long)&this_cpu_off; + + /* mov , (if necessary) */ + EMIT_mov(dst_reg, src_reg); + + /* add , gs:[] */ + EMIT2(0x65, add_1mod(0x48, dst_reg)); + EMIT3(0x03, add_1reg(0x04, dst_reg), 0x25); + EMIT(off, 4); + break; } fallthrough; case BPF_ALU | BPF_MOV | BPF_X: @@ -3365,6 +3376,11 @@ bool bpf_jit_supports_subprog_tailcalls(void) return true; } +bool bpf_jit_supports_percpu_insn(void) +{ + return true; +} + void bpf_jit_free(struct bpf_prog *prog) { if (prog->jited) { diff --git a/include/linux/filter.h b/include/linux/filter.h index 531b36090122..161d5f7b64ed 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -178,6 +178,25 @@ struct ctl_table_header; .off = 0, \ .imm = 0 }) +/* Special (internal-only) form of mov, used to resolve per-CPU addrs: + * dst_reg = src_reg + + * BPF_ADDR_PERCPU is used as a special insn->off value. + */ +#define BPF_ADDR_PERCPU (-1) + +#define BPF_MOV64_PERCPU_REG(DST, SRC) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_MOV | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = BPF_ADDR_PERCPU, \ + .imm = 0 }) + +static inline bool insn_is_mov_percpu_addr(const struct bpf_insn *insn) +{ + return insn->code == (BPF_ALU64 | BPF_MOV | BPF_X) && insn->off == BPF_ADDR_PERCPU; +} + /* Short form of mov, dst_reg = imm32 */ #define BPF_MOV64_IMM(DST, IMM) \ @@ -972,6 +991,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog); void bpf_jit_compile(struct bpf_prog *prog); bool bpf_jit_needs_zext(void); bool bpf_jit_supports_subprog_tailcalls(void); +bool bpf_jit_supports_percpu_insn(void); bool bpf_jit_supports_kfunc_call(void); bool bpf_jit_supports_far_kfunc_call(void); bool bpf_jit_supports_exceptions(void); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index ae406a2814db..7a33a3a7e63c 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2945,6 +2945,11 @@ bool __weak bpf_jit_supports_subprog_tailcalls(void) return false; } +bool __weak bpf_jit_supports_percpu_insn(void) +{ + return false; +} + bool __weak bpf_jit_supports_kfunc_call(void) { return false; diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c index bd2e2dd04740..309c4aa1b026 100644 --- a/kernel/bpf/disasm.c +++ b/kernel/bpf/disasm.c @@ -172,6 +172,17 @@ static bool is_addr_space_cast(const struct bpf_insn *insn) insn->off == BPF_ADDR_SPACE_CAST; } +/* Special (internal-only) form of mov, used to resolve per-CPU addrs: + * dst_reg = src_reg + + * BPF_ADDR_PERCPU is used as a special insn->off value. + */ +#define BPF_ADDR_PERCPU (-1) + +static inline bool is_mov_percpu_addr(const struct bpf_insn *insn) +{ + return insn->code == (BPF_ALU64 | BPF_MOV | BPF_X) && insn->off == BPF_ADDR_PERCPU; +} + void print_bpf_insn(const struct bpf_insn_cbs *cbs, const struct bpf_insn *insn, bool allow_ptr_leaks) @@ -194,6 +205,9 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs, verbose(cbs->private_data, "(%02x) r%d = addr_space_cast(r%d, %d, %d)\n", insn->code, insn->dst_reg, insn->src_reg, ((u32)insn->imm) >> 16, (u16)insn->imm); + } else if (is_mov_percpu_addr(insn)) { + verbose(cbs->private_data, "(%02x) r%d = &(void __percpu *)(r%d)\n", + insn->code, insn->dst_reg, insn->src_reg); } else if (BPF_SRC(insn->code) == BPF_X) { verbose(cbs->private_data, "(%02x) %c%d %s %s%c%d\n", insn->code, class == BPF_ALU ? 'w' : 'r', From 1ae6921009e5d72787e07ccc04754514ccf6bc99 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 1 Apr 2024 19:13:03 -0700 Subject: [PATCH 2/4] bpf: inline bpf_get_smp_processor_id() helper If BPF JIT supports per-CPU MOV instruction, inline bpf_get_smp_processor_id() to eliminate unnecessary function calls. Signed-off-by: Andrii Nakryiko Acked-by: John Fastabend Link: https://lore.kernel.org/r/20240402021307.1012571-3-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index fcb62300f407..17c06f1505e4 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -20074,6 +20074,30 @@ static int do_misc_fixups(struct bpf_verifier_env *env) goto next_insn; } +#ifdef CONFIG_X86_64 + /* Implement bpf_get_smp_processor_id() inline. */ + if (insn->imm == BPF_FUNC_get_smp_processor_id && + prog->jit_requested && bpf_jit_supports_percpu_insn()) { + /* BPF_FUNC_get_smp_processor_id inlining is an + * optimization, so if pcpu_hot.cpu_number is ever + * changed in some incompatible and hard to support + * way, it's fine to back out this inlining logic + */ + insn_buf[0] = BPF_MOV32_IMM(BPF_REG_0, (u32)(unsigned long)&pcpu_hot.cpu_number); + insn_buf[1] = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0); + insn_buf[2] = BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0); + cnt = 3; + + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + env->prog = prog = new_prog; + insn = new_prog->insnsi + i + delta; + goto next_insn; + } +#endif /* Implement bpf_get_func_arg inline. */ if (prog_type == BPF_PROG_TYPE_TRACING && insn->imm == BPF_FUNC_get_func_arg) { From db69718b8efac802c7cc20d5a6c7dfc913f99c43 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 1 Apr 2024 19:13:04 -0700 Subject: [PATCH 3/4] bpf: inline bpf_map_lookup_elem() for PERCPU_ARRAY maps Using new per-CPU BPF instruction implement inlining for per-CPU ARRAY map lookup helper, if BPF JIT support is present. Signed-off-by: Andrii Nakryiko Acked-by: John Fastabend Link: https://lore.kernel.org/r/20240402021307.1012571-4-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/arraymap.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 13358675ff2e..8c1e6d7654bb 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -246,6 +246,38 @@ static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key) return this_cpu_ptr(array->pptrs[index & array->index_mask]); } +/* emit BPF instructions equivalent to C code of percpu_array_map_lookup_elem() */ +static int percpu_array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) +{ + struct bpf_array *array = container_of(map, struct bpf_array, map); + struct bpf_insn *insn = insn_buf; + + if (!bpf_jit_supports_percpu_insn()) + return -EOPNOTSUPP; + + if (map->map_flags & BPF_F_INNER_MAP) + return -EOPNOTSUPP; + + BUILD_BUG_ON(offsetof(struct bpf_array, map) != 0); + *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct bpf_array, pptrs)); + + *insn++ = BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0); + if (!map->bypass_spec_v1) { + *insn++ = BPF_JMP_IMM(BPF_JGE, BPF_REG_0, map->max_entries, 6); + *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_0, array->index_mask); + } else { + *insn++ = BPF_JMP_IMM(BPF_JGE, BPF_REG_0, map->max_entries, 5); + } + + *insn++ = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3); + *insn++ = BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1); + *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0); + *insn++ = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0); + *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); + *insn++ = BPF_MOV64_IMM(BPF_REG_0, 0); + return insn - insn_buf; +} + static void *percpu_array_map_lookup_percpu_elem(struct bpf_map *map, void *key, u32 cpu) { struct bpf_array *array = container_of(map, struct bpf_array, map); @@ -776,6 +808,7 @@ const struct bpf_map_ops percpu_array_map_ops = { .map_free = array_map_free, .map_get_next_key = array_map_get_next_key, .map_lookup_elem = percpu_array_map_lookup_elem, + .map_gen_lookup = percpu_array_map_gen_lookup, .map_update_elem = array_map_update_elem, .map_delete_elem = array_map_delete_elem, .map_lookup_percpu_elem = percpu_array_map_lookup_percpu_elem, From 0b56e637f705836b9aa51e2b1058c3c814c121a8 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 1 Apr 2024 19:13:05 -0700 Subject: [PATCH 4/4] bpf: inline bpf_map_lookup_elem() helper for PERCPU_HASH map Using new per-CPU BPF instruction, partially inline bpf_map_lookup_elem() helper for per-CPU hashmap BPF map. Just like for normal HASH map, we still generate a call into __htab_map_lookup_elem(), but after that we resolve per-CPU element address using a new instruction, saving on extra functions calls. Signed-off-by: Andrii Nakryiko Acked-by: John Fastabend Link: https://lore.kernel.org/r/20240402021307.1012571-5-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/hashtab.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index e81059faae63..83a9a74260e9 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -2308,6 +2308,26 @@ static void *htab_percpu_map_lookup_elem(struct bpf_map *map, void *key) return NULL; } +/* inline bpf_map_lookup_elem() call for per-CPU hashmap */ +static int htab_percpu_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) +{ + struct bpf_insn *insn = insn_buf; + + if (!bpf_jit_supports_percpu_insn()) + return -EOPNOTSUPP; + + BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem, + (void *(*)(struct bpf_map *map, void *key))NULL)); + *insn++ = BPF_EMIT_CALL(__htab_map_lookup_elem); + *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3); + *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, + offsetof(struct htab_elem, key) + map->key_size); + *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0); + *insn++ = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0); + + return insn - insn_buf; +} + static void *htab_percpu_map_lookup_percpu_elem(struct bpf_map *map, void *key, u32 cpu) { struct htab_elem *l; @@ -2436,6 +2456,7 @@ const struct bpf_map_ops htab_percpu_map_ops = { .map_free = htab_map_free, .map_get_next_key = htab_map_get_next_key, .map_lookup_elem = htab_percpu_map_lookup_elem, + .map_gen_lookup = htab_percpu_map_gen_lookup, .map_lookup_and_delete_elem = htab_percpu_map_lookup_and_delete_elem, .map_update_elem = htab_percpu_map_update_elem, .map_delete_elem = htab_map_delete_elem,