From 571fa247ab411f3233eeaaf837c6e646a513b9f8 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Tue, 28 Sep 2021 11:16:08 +0530 Subject: [PATCH 1/7] samples: bpf: Fix vmlinux.h generation for XDP samples MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Generate vmlinux.h only from the in-tree vmlinux, and remove enum declarations that would cause a build failure in case of version mismatches. There are now two options when building the samples: 1. Compile the kernel to use in-tree vmlinux for vmlinux.h 2. Override VMLINUX_BTF for samples using something like this: make VMLINUX_BTF=/sys/kernel/btf/vmlinux -C samples/bpf This change was tested with relative builds, e.g. cases like: * make O=build -C samples/bpf * make KBUILD_OUTPUT=build -C samples/bpf * make -C samples/bpf * cd samples/bpf && make When a suitable VMLINUX_BTF is not found, the following message is printed: /home/kkd/src/linux/samples/bpf/Makefile:333: *** Cannot find a vmlinux for VMLINUX_BTF at any of " ./vmlinux", build the kernel or set VMLINUX_BTF variable. Stop. Fixes: 384b6b3bbf0d (samples: bpf: Add vmlinux.h generation support) Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Andrii Nakryiko Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/bpf/20210928054608.1799021-1-memxor@gmail.com --- samples/bpf/Makefile | 17 ++++++++--------- samples/bpf/xdp_redirect_map_multi.bpf.c | 5 ----- 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 4dc20be5fb96..5fd48a8d4f10 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -322,17 +322,11 @@ $(obj)/hbm_edt_kern.o: $(src)/hbm.h $(src)/hbm_kern.h -include $(BPF_SAMPLES_PATH)/Makefile.target -VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ - $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \ - ../../../../vmlinux \ - /sys/kernel/btf/vmlinux \ - /boot/vmlinux-$(shell uname -r) +VMLINUX_BTF_PATHS ?= $(abspath $(if $(O),$(O)/vmlinux)) \ + $(abspath $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux)) \ + $(abspath ./vmlinux) VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) -ifeq ($(VMLINUX_BTF),) -$(error Cannot find a vmlinux for VMLINUX_BTF at any of "$(VMLINUX_BTF_PATHS)") -endif - $(obj)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL) ifeq ($(VMLINUX_H),) $(Q)$(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@ @@ -340,6 +334,11 @@ else $(Q)cp "$(VMLINUX_H)" $@ endif +ifeq ($(VMLINUX_BTF),) + $(error Cannot find a vmlinux for VMLINUX_BTF at any of "$(VMLINUX_BTF_PATHS)",\ + build the kernel or set VMLINUX_BTF variable) +endif + clean-files += vmlinux.h # Get Clang's default includes on this system, as opposed to those seen by diff --git a/samples/bpf/xdp_redirect_map_multi.bpf.c b/samples/bpf/xdp_redirect_map_multi.bpf.c index 8f59d430cb64..bb0a5a3bfcf0 100644 --- a/samples/bpf/xdp_redirect_map_multi.bpf.c +++ b/samples/bpf/xdp_redirect_map_multi.bpf.c @@ -5,11 +5,6 @@ #include "xdp_sample.bpf.h" #include "xdp_sample_shared.h" -enum { - BPF_F_BROADCAST = (1ULL << 3), - BPF_F_EXCLUDE_INGRESS = (1ULL << 4), -}; - struct { __uint(type, BPF_MAP_TYPE_DEVMAP_HASH); __uint(key_size, sizeof(int)); From d75fe9cb1dd062684c9fb8a4581738170365dc06 Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Thu, 23 Sep 2021 01:05:40 +0100 Subject: [PATCH 2/7] samples/bpf: Relicense bpf_insn.h as GPL-2.0-only OR BSD-2-Clause MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit libbpf and bpftool have been dual-licensed to facilitate inclusion in software that is not compatible with GPL2-only (ie: Apache2), but the samples are still GPL2-only. Given these files are samples, they get naturally copied around. For example, it is the case for samples/bpf/bpf_insn.h which was copied into the systemd tree: https://github.com/systemd/systemd/blob/main/src/shared/linux/bpf_insn.h Some more context on systemd's needs specifically: Most of systemd is (L)GPL2-or-later, which means there is no perceived incompatibility with Apache2 software and can thus be linked with OpenSSL 3.0. But given this GPL2-only header is included this is currently not possible. Dual-licensing this header solves this problem for us as we are scoping a move to OpenSSL 3.0, see: https://lists.freedesktop.org/archives/systemd-devel/2021-September/046882.html Dual-license this header as GPL-2.0-only OR BSD-2-Clause to follow the same licensing used by libbpf and bpftool: 1bc38b8ff6cc ("libbpf: relicense libbpf as LGPL-2.1 OR BSD-2-Clause") 907b22365115 ("tools: bpftool: dual license all files") Signed-off-by: Luca Boccassi Signed-off-by: Daniel Borkmann Acked-by: Simon Horman Acked-by: Daniel Mack Acked-by: Josef Bacik Acked-by: Joe Stringer Acked-by: Chenbo Feng Acked-by: Björn Töpel Acked-by: Magnus Karlsson Acked-by: Brendan Jackman Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210923000540.47344-1-luca.boccassi@gmail.com --- samples/bpf/bpf_insn.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/bpf/bpf_insn.h b/samples/bpf/bpf_insn.h index aee04534483a..29c3bb6ad1cd 100644 --- a/samples/bpf/bpf_insn.h +++ b/samples/bpf/bpf_insn.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ /* eBPF instruction mini library */ #ifndef __BPF_INSN_H #define __BPF_INSN_H From 79e3445b38e0cab94264a3894c0c3d57c930b97e Mon Sep 17 00:00:00 2001 From: Johan Almbladh Date: Tue, 28 Sep 2021 11:13:10 +0200 Subject: [PATCH 3/7] bpf, arm: Fix register clobbering in div/mod implementation On ARM CPUs that lack div/mod instructions, ALU32 BPF_DIV and BPF_MOD are implemented using a call to a helper function. Before, the emitted code for those function calls failed to preserve caller-saved ARM registers. Since some of those registers happen to be mapped to BPF registers, it resulted in eBPF register values being overwritten. This patch emits code to push and pop the remaining caller-saved ARM registers r2-r3 into the stack during the div/mod function call. ARM registers r0-r1 are used as arguments and return value, and those were already saved and restored correctly. Fixes: 39c13c204bb1 ("arm: eBPF JIT compiler") Signed-off-by: Johan Almbladh Signed-off-by: Daniel Borkmann --- arch/arm/net/bpf_jit_32.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index a951276f0547..a903b26cde40 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -36,6 +36,10 @@ * +-----+ * |RSVD | JIT scratchpad * current ARM_SP => +-----+ <= (BPF_FP - STACK_SIZE + SCRATCH_SIZE) + * | ... | caller-saved registers + * +-----+ + * | ... | arguments passed on stack + * ARM_SP during call => +-----| * | | * | ... | Function call stack * | | @@ -63,6 +67,12 @@ * * When popping registers off the stack at the end of a BPF function, we * reference them via the current ARM_FP register. + * + * Some eBPF operations are implemented via a call to a helper function. + * Such calls are "invisible" in the eBPF code, so it is up to the calling + * program to preserve any caller-saved ARM registers during the call. The + * JIT emits code to push and pop those registers onto the stack, immediately + * above the callee stack frame. */ #define CALLEE_MASK (1 << ARM_R4 | 1 << ARM_R5 | 1 << ARM_R6 | \ 1 << ARM_R7 | 1 << ARM_R8 | 1 << ARM_R9 | \ @@ -70,6 +80,8 @@ #define CALLEE_PUSH_MASK (CALLEE_MASK | 1 << ARM_LR) #define CALLEE_POP_MASK (CALLEE_MASK | 1 << ARM_PC) +#define CALLER_MASK (1 << ARM_R0 | 1 << ARM_R1 | 1 << ARM_R2 | 1 << ARM_R3) + enum { /* Stack layout - these are offsets from (top of stack - 4) */ BPF_R2_HI, @@ -464,6 +476,7 @@ static inline int epilogue_offset(const struct jit_ctx *ctx) static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op) { + const int exclude_mask = BIT(ARM_R0) | BIT(ARM_R1); const s8 *tmp = bpf2a32[TMP_REG_1]; #if __LINUX_ARM_ARCH__ == 7 @@ -495,11 +508,17 @@ static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op) emit(ARM_MOV_R(ARM_R0, rm), ctx); } + /* Push caller-saved registers on stack */ + emit(ARM_PUSH(CALLER_MASK & ~exclude_mask), ctx); + /* Call appropriate function */ emit_mov_i(ARM_IP, op == BPF_DIV ? (u32)jit_udiv32 : (u32)jit_mod32, ctx); emit_blx_r(ARM_IP, ctx); + /* Restore caller-saved registers from stack */ + emit(ARM_POP(CALLER_MASK & ~exclude_mask), ctx); + /* Save return value */ if (rd != ARM_R0) emit(ARM_MOV_R(rd, ARM_R0), ctx); From 30e29a9a2bc6a4888335a6ede968b75cd329657a Mon Sep 17 00:00:00 2001 From: Tatsuhiko Yasumatsu Date: Thu, 30 Sep 2021 22:55:45 +0900 Subject: [PATCH 4/7] bpf: Fix integer overflow in prealloc_elems_and_freelist() In prealloc_elems_and_freelist(), the multiplication to calculate the size passed to bpf_map_area_alloc() could lead to an integer overflow. As a result, out-of-bounds write could occur in pcpu_freelist_populate() as reported by KASAN: [...] [ 16.968613] BUG: KASAN: slab-out-of-bounds in pcpu_freelist_populate+0xd9/0x100 [ 16.969408] Write of size 8 at addr ffff888104fc6ea0 by task crash/78 [ 16.970038] [ 16.970195] CPU: 0 PID: 78 Comm: crash Not tainted 5.15.0-rc2+ #1 [ 16.970878] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 [ 16.972026] Call Trace: [ 16.972306] dump_stack_lvl+0x34/0x44 [ 16.972687] print_address_description.constprop.0+0x21/0x140 [ 16.973297] ? pcpu_freelist_populate+0xd9/0x100 [ 16.973777] ? pcpu_freelist_populate+0xd9/0x100 [ 16.974257] kasan_report.cold+0x7f/0x11b [ 16.974681] ? pcpu_freelist_populate+0xd9/0x100 [ 16.975190] pcpu_freelist_populate+0xd9/0x100 [ 16.975669] stack_map_alloc+0x209/0x2a0 [ 16.976106] __sys_bpf+0xd83/0x2ce0 [...] The possibility of this overflow was originally discussed in [0], but was overlooked. Fix the integer overflow by changing elem_size to u64 from u32. [0] https://lore.kernel.org/bpf/728b238e-a481-eb50-98e9-b0f430ab01e7@gmail.com/ Fixes: 557c0c6e7df8 ("bpf: convert stackmap to pre-allocation") Signed-off-by: Tatsuhiko Yasumatsu Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210930135545.173698-1-th.yasumatsu@gmail.com --- kernel/bpf/stackmap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 09a3fd97d329..6e75bbee39f0 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -63,7 +63,8 @@ static inline int stack_map_data_size(struct bpf_map *map) static int prealloc_elems_and_freelist(struct bpf_stack_map *smap) { - u32 elem_size = sizeof(struct stack_map_bucket) + smap->map.value_size; + u64 elem_size = sizeof(struct stack_map_bucket) + + (u64)smap->map.value_size; int err; smap->elems = bpf_map_area_alloc(elem_size * smap->map.max_entries, From 4729445b47efebf089da4ccbcd1b116ffa2ad4af Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Thu, 30 Sep 2021 11:46:34 +0530 Subject: [PATCH 5/7] libbpf: Fix segfault in light skeleton for objects without BTF When fed an empty BPF object, bpftool gen skeleton -L crashes at btf__set_fd() since it assumes presence of obj->btf, however for the sequence below clang adds no .BTF section (hence no BTF). Reproducer: $ touch a.bpf.c $ clang -O2 -g -target bpf -c a.bpf.c $ bpftool gen skeleton -L a.bpf.o /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ /* THIS FILE IS AUTOGENERATED! */ struct a_bpf { struct bpf_loader_ctx ctx; Segmentation fault (core dumped) The same occurs for files compiled without BTF info, i.e. without clang's -g flag. Fixes: 67234743736a (libbpf: Generate loader program out of BPF ELF file.) Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210930061634.1840768-1-memxor@gmail.com --- tools/lib/bpf/libbpf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 88d8825fc6f6..e4f83c304ec9 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -6894,7 +6894,8 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr) if (obj->gen_loader) { /* reset FDs */ - btf__set_fd(obj->btf, -1); + if (obj->btf) + btf__set_fd(obj->btf, -1); for (i = 0; i < obj->nr_maps; i++) obj->maps[i].fd = -1; if (!err) From b0e875bac0fab3e7a7431c2eee36a8ccc0c712ac Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 1 Oct 2021 11:59:10 -0700 Subject: [PATCH 6/7] libbpf: Fix memory leak in strset Free struct strset itself, not just its internal parts. Fixes: 90d76d3ececc ("libbpf: Extract internal set-of-strings datastructure APIs") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20211001185910.86492-1-andrii@kernel.org --- tools/lib/bpf/strset.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/lib/bpf/strset.c b/tools/lib/bpf/strset.c index 1fb8b49de1d6..ea655318153f 100644 --- a/tools/lib/bpf/strset.c +++ b/tools/lib/bpf/strset.c @@ -88,6 +88,7 @@ void strset__free(struct strset *set) hashmap__free(set->strs_hash); free(set->strs_data); + free(set); } size_t strset__data_size(const struct strset *set) From d0c6416bd7091647f6041599f396bfa19ae30368 Mon Sep 17 00:00:00 2001 From: Jiang Wang Date: Mon, 4 Oct 2021 23:25:28 +0000 Subject: [PATCH 7/7] unix: Fix an issue in unix_shutdown causing the other end read/write failures Commit 94531cfcbe79 ("af_unix: Add unix_stream_proto for sockmap") sets unix domain socket peer state to TCP_CLOSE in unix_shutdown. This could happen when the local end is shutdown but the other end is not. Then, the other end will get read or write failures which is not expected. Fix the issue by setting the local state to shutdown. Fixes: 94531cfcbe79 ("af_unix: Add unix_stream_proto for sockmap") Reported-by: Casey Schaufler Suggested-by: Cong Wang Signed-off-by: Jiang Wang Signed-off-by: Daniel Borkmann Tested-by: Casey Schaufler Reviewed-by: Casey Schaufler Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20211004232530.2377085-1-jiang.wang@bytedance.com --- net/unix/af_unix.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index f505b89bda6a..915afcae6a12 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2860,6 +2860,9 @@ static int unix_shutdown(struct socket *sock, int mode) unix_state_lock(sk); sk->sk_shutdown |= mode; + if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && + mode == SHUTDOWN_MASK) + sk->sk_state = TCP_CLOSE; other = unix_peer(sk); if (other) sock_hold(other); @@ -2882,12 +2885,10 @@ static int unix_shutdown(struct socket *sock, int mode) other->sk_shutdown |= peer_mode; unix_state_unlock(other); other->sk_state_change(other); - if (peer_mode == SHUTDOWN_MASK) { + if (peer_mode == SHUTDOWN_MASK) sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP); - other->sk_state = TCP_CLOSE; - } else if (peer_mode & RCV_SHUTDOWN) { + else if (peer_mode & RCV_SHUTDOWN) sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN); - } } if (other) sock_put(other);