From c5815ac7e2aaff4f00b2b9e21d84b9f2fddddb48 Mon Sep 17 00:00:00 2001 From: "Daniel T. Lee" Date: Tue, 24 Nov 2020 09:03:04 +0000 Subject: [PATCH 1/7] samples: bpf: Refactor hbm program with libbpf This commit refactors the existing cgroup programs with libbpf bpf loader. Since bpf_program__attach doesn't support cgroup program attachment, this explicitly attaches cgroup bpf program with bpf_program__attach_cgroup(bpf_prog, cg1). Also, to change attach_type of bpf program, this uses libbpf's bpf_program__set_expected_attach_type helper to switch EGRESS to INGRESS. To keep bpf program attached to the cgroup hierarchy even after the exit, this commit uses the BPF_LINK_PINNING to pin the link attachment even after it is closed. Besides, this program was broken due to the typo of BPF MAP definition. But this commit solves the problem by fixing this from 'queue_stats' map struct hvm_queue_stats -> hbm_queue_stats. Fixes: 36b5d471135c ("selftests/bpf: samples/bpf: Split off legacy stuff from bpf_helpers.h") Signed-off-by: Daniel T. Lee Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201124090310.24374-2-danieltimlee@gmail.com --- samples/bpf/.gitignore | 3 + samples/bpf/Makefile | 2 +- samples/bpf/do_hbm_test.sh | 32 +++++------ samples/bpf/hbm.c | 113 ++++++++++++++++++++----------------- samples/bpf/hbm_kern.h | 2 +- 5 files changed, 79 insertions(+), 73 deletions(-) diff --git a/samples/bpf/.gitignore b/samples/bpf/.gitignore index b2f29bc8dc43..0b9548ea8477 100644 --- a/samples/bpf/.gitignore +++ b/samples/bpf/.gitignore @@ -52,3 +52,6 @@ xdp_tx_iptunnel xdpsock xsk_fwd testfile.img +hbm_out.log +iperf.* +*.out diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index aeebf5d12f32..7c61118525f7 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -110,7 +110,7 @@ xdp_fwd-objs := xdp_fwd_user.o task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS) xdp_sample_pkts-objs := xdp_sample_pkts_user.o $(TRACE_HELPERS) ibumad-objs := bpf_load.o ibumad_user.o $(TRACE_HELPERS) -hbm-objs := bpf_load.o hbm.o $(CGROUP_HELPERS) +hbm-objs := hbm.o $(CGROUP_HELPERS) # Tell kbuild to always build the programs always-y := $(tprogs-y) diff --git a/samples/bpf/do_hbm_test.sh b/samples/bpf/do_hbm_test.sh index ffe4c0607341..21790ea5c460 100755 --- a/samples/bpf/do_hbm_test.sh +++ b/samples/bpf/do_hbm_test.sh @@ -91,6 +91,16 @@ qdisc="" flags="" do_stats=0 +BPFFS=/sys/fs/bpf +function config_bpffs () { + if mount | grep $BPFFS > /dev/null; then + echo "bpffs already mounted" + else + echo "bpffs not mounted. Mounting..." + mount -t bpf none $BPFFS + fi +} + function start_hbm () { rm -f hbm.out echo "./hbm $dir -n $id -r $rate -t $dur $flags $dbg $prog" > hbm.out @@ -192,6 +202,7 @@ processArgs () { } processArgs +config_bpffs if [ $debug_flag -eq 1 ] ; then rm -f hbm_out.log @@ -201,7 +212,7 @@ hbm_pid=$(start_hbm) usleep 100000 host=`hostname` -cg_base_dir=/sys/fs/cgroup +cg_base_dir=/sys/fs/cgroup/unified cg_dir="$cg_base_dir/cgroup-test-work-dir/hbm$id" echo $$ >> $cg_dir/cgroup.procs @@ -411,23 +422,8 @@ fi sleep 1 -# Detach any BPF programs that may have lingered -ttx=`bpftool cgroup tree | grep hbm` -v=2 -for x in $ttx ; do - if [ "${x:0:36}" == "/sys/fs/cgroup/cgroup-test-work-dir/" ] ; then - cg=$x ; v=0 - else - if [ $v -eq 0 ] ; then - id=$x ; v=1 - else - if [ $v -eq 1 ] ; then - type=$x ; bpftool cgroup detach $cg $type id $id - v=0 - fi - fi - fi -done +# Detach any pinned BPF programs that may have lingered +rm -rf $BPFFS/hbm* if [ $use_netperf -ne 0 ] ; then if [ "$server" == "" ] ; then diff --git a/samples/bpf/hbm.c b/samples/bpf/hbm.c index 400e741a56eb..b0c18efe7928 100644 --- a/samples/bpf/hbm.c +++ b/samples/bpf/hbm.c @@ -46,7 +46,6 @@ #include #include -#include "bpf_load.h" #include "bpf_rlimit.h" #include "cgroup_helpers.h" #include "hbm.h" @@ -70,9 +69,9 @@ static void do_error(char *msg, bool errno_flag); #define DEBUGFS "/sys/kernel/debug/tracing/" -struct bpf_object *obj; -int bpfprog_fd; -int cgroup_storage_fd; +static struct bpf_program *bpf_prog; +static struct bpf_object *obj; +static int queue_stats_fd; static void read_trace_pipe2(void) { @@ -121,56 +120,50 @@ static void do_error(char *msg, bool errno_flag) static int prog_load(char *prog) { - struct bpf_prog_load_attr prog_load_attr = { - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, - .file = prog, - .expected_attach_type = BPF_CGROUP_INET_EGRESS, - }; - int map_fd; - struct bpf_map *map; - - int ret = 0; - - if (access(prog, O_RDONLY) < 0) { - printf("Error accessing file %s: %s\n", prog, strerror(errno)); + obj = bpf_object__open_file(prog, NULL); + if (libbpf_get_error(obj)) { + printf("ERROR: opening BPF object file failed\n"); return 1; } - if (bpf_prog_load_xattr(&prog_load_attr, &obj, &bpfprog_fd)) - ret = 1; - if (!ret) { - map = bpf_object__find_map_by_name(obj, "queue_stats"); - map_fd = bpf_map__fd(map); - if (map_fd < 0) { - printf("Map not found: %s\n", strerror(map_fd)); - ret = 1; - } + + /* load BPF program */ + if (bpf_object__load(obj)) { + printf("ERROR: loading BPF object file failed\n"); + goto err; } - if (ret) { - printf("ERROR: bpf_prog_load_xattr failed for: %s\n", prog); - printf(" Output from verifier:\n%s\n------\n", bpf_log_buf); - ret = -1; - } else { - ret = map_fd; + bpf_prog = bpf_object__find_program_by_title(obj, "cgroup_skb/egress"); + if (!bpf_prog) { + printf("ERROR: finding a prog in obj file failed\n"); + goto err; } - return ret; + queue_stats_fd = bpf_object__find_map_fd_by_name(obj, "queue_stats"); + if (queue_stats_fd < 0) { + printf("ERROR: finding a map in obj file failed\n"); + goto err; + } + + return 0; + +err: + bpf_object__close(obj); + return 1; } static int run_bpf_prog(char *prog, int cg_id) { - int map_fd; - int rc = 0; + struct hbm_queue_stats qstats = {0}; + char cg_dir[100], cg_pin_path[100]; + struct bpf_link *link = NULL; int key = 0; int cg1 = 0; - int type = BPF_CGROUP_INET_EGRESS; - char cg_dir[100]; - struct hbm_queue_stats qstats = {0}; + int rc = 0; sprintf(cg_dir, "/hbm%d", cg_id); - map_fd = prog_load(prog); - if (map_fd == -1) - return 1; + rc = prog_load(prog); + if (rc != 0) + return rc; if (setup_cgroup_environment()) { printf("ERROR: setting cgroup environment\n"); @@ -190,16 +183,24 @@ static int run_bpf_prog(char *prog, int cg_id) qstats.stats = stats_flag ? 1 : 0; qstats.loopback = loopback_flag ? 1 : 0; qstats.no_cn = no_cn_flag ? 1 : 0; - if (bpf_map_update_elem(map_fd, &key, &qstats, BPF_ANY)) { + if (bpf_map_update_elem(queue_stats_fd, &key, &qstats, BPF_ANY)) { printf("ERROR: Could not update map element\n"); goto err; } if (!outFlag) - type = BPF_CGROUP_INET_INGRESS; - if (bpf_prog_attach(bpfprog_fd, cg1, type, 0)) { - printf("ERROR: bpf_prog_attach fails!\n"); - log_err("Attaching prog"); + bpf_program__set_expected_attach_type(bpf_prog, BPF_CGROUP_INET_INGRESS); + + link = bpf_program__attach_cgroup(bpf_prog, cg1); + if (libbpf_get_error(link)) { + fprintf(stderr, "ERROR: bpf_program__attach_cgroup failed\n"); + goto err; + } + + sprintf(cg_pin_path, "/sys/fs/bpf/hbm%d", cg_id); + rc = bpf_link__pin(link, cg_pin_path); + if (rc < 0) { + printf("ERROR: bpf_link__pin failed: %d\n", rc); goto err; } @@ -213,7 +214,7 @@ static int run_bpf_prog(char *prog, int cg_id) #define DELTA_RATE_CHECK 10000 /* in us */ #define RATE_THRESHOLD 9500000000 /* 9.5 Gbps */ - bpf_map_lookup_elem(map_fd, &key, &qstats); + bpf_map_lookup_elem(queue_stats_fd, &key, &qstats); if (gettimeofday(&t0, NULL) < 0) do_error("gettimeofday failed", true); t_last = t0; @@ -242,7 +243,7 @@ static int run_bpf_prog(char *prog, int cg_id) fclose(fin); printf(" new_eth_tx_bytes:%llu\n", new_eth_tx_bytes); - bpf_map_lookup_elem(map_fd, &key, &qstats); + bpf_map_lookup_elem(queue_stats_fd, &key, &qstats); new_cg_tx_bytes = qstats.bytes_total; delta_bytes = new_eth_tx_bytes - last_eth_tx_bytes; last_eth_tx_bytes = new_eth_tx_bytes; @@ -289,14 +290,14 @@ static int run_bpf_prog(char *prog, int cg_id) rate = minRate; qstats.rate = rate; } - if (bpf_map_update_elem(map_fd, &key, &qstats, BPF_ANY)) + if (bpf_map_update_elem(queue_stats_fd, &key, &qstats, BPF_ANY)) do_error("update map element fails", false); } } else { sleep(dur); } // Get stats! - if (stats_flag && bpf_map_lookup_elem(map_fd, &key, &qstats)) { + if (stats_flag && bpf_map_lookup_elem(queue_stats_fd, &key, &qstats)) { char fname[100]; FILE *fout; @@ -394,14 +395,20 @@ static int run_bpf_prog(char *prog, int cg_id) if (debugFlag) read_trace_pipe2(); - return rc; + goto cleanup; + err: rc = 1; - if (cg1) - close(cg1); - cleanup_cgroup_environment(); +cleanup: + bpf_link__destroy(link); + bpf_object__close(obj); + if (cg1 != -1) + close(cg1); + + if (rc != 0) + cleanup_cgroup_environment(); return rc; } diff --git a/samples/bpf/hbm_kern.h b/samples/bpf/hbm_kern.h index e00f26f6afba..722b3fadb467 100644 --- a/samples/bpf/hbm_kern.h +++ b/samples/bpf/hbm_kern.h @@ -69,7 +69,7 @@ struct { __uint(type, BPF_MAP_TYPE_ARRAY); __uint(max_entries, 1); __type(key, u32); - __type(value, struct hvm_queue_stats); + __type(value, struct hbm_queue_stats); } queue_stats SEC(".maps"); struct hbm_pkt_info { From d89af13c92056c46dfc4bcb3d90efe88937c3381 Mon Sep 17 00:00:00 2001 From: "Daniel T. Lee" Date: Tue, 24 Nov 2020 09:03:05 +0000 Subject: [PATCH 2/7] samples: bpf: Refactor test_cgrp2_sock2 program with libbpf This commit refactors the existing cgroup program with libbpf bpf loader. The original test_cgrp2_sock2 has keeped the bpf program attached to the cgroup hierarchy even after the exit of user program. To implement the same functionality with libbpf, this commit uses the BPF_LINK_PINNING to pin the link attachment even after it is closed. Since this uses LINK instead of ATTACH, detach of bpf program from cgroup with 'test_cgrp2_sock' is not used anymore. The code to mount the bpf was added to the .sh file in case the bpff was not mounted on /sys/fs/bpf. Additionally, to fix the problem that shell script cannot find the binary object from the current path, relative path './' has been added in front of binary. Fixes: 554ae6e792ef3 ("samples/bpf: add userspace example for prohibiting sockets") Signed-off-by: Daniel T. Lee Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201124090310.24374-3-danieltimlee@gmail.com --- samples/bpf/Makefile | 2 +- samples/bpf/test_cgrp2_sock2.c | 69 +++++++++++++++++++++++---------- samples/bpf/test_cgrp2_sock2.sh | 21 ++++++++-- 3 files changed, 66 insertions(+), 26 deletions(-) diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 7c61118525f7..d31e082c369e 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -82,7 +82,7 @@ test_overhead-objs := bpf_load.o test_overhead_user.o test_cgrp2_array_pin-objs := test_cgrp2_array_pin.o test_cgrp2_attach-objs := test_cgrp2_attach.o test_cgrp2_sock-objs := test_cgrp2_sock.o -test_cgrp2_sock2-objs := bpf_load.o test_cgrp2_sock2.o +test_cgrp2_sock2-objs := test_cgrp2_sock2.o xdp1-objs := xdp1_user.o # reuse xdp1 source intentionally xdp2-objs := xdp1_user.o diff --git a/samples/bpf/test_cgrp2_sock2.c b/samples/bpf/test_cgrp2_sock2.c index a9277b118c33..e7060aaa2f5a 100644 --- a/samples/bpf/test_cgrp2_sock2.c +++ b/samples/bpf/test_cgrp2_sock2.c @@ -20,9 +20,9 @@ #include #include #include +#include #include "bpf_insn.h" -#include "bpf_load.h" static int usage(const char *argv0) { @@ -32,37 +32,64 @@ static int usage(const char *argv0) int main(int argc, char **argv) { - int cg_fd, ret, filter_id = 0; + int cg_fd, err, ret = EXIT_FAILURE, filter_id = 0, prog_cnt = 0; + const char *link_pin_path = "/sys/fs/bpf/test_cgrp2_sock2"; + struct bpf_link *link = NULL; + struct bpf_program *progs[2]; + struct bpf_program *prog; + struct bpf_object *obj; if (argc < 3) return usage(argv[0]); - cg_fd = open(argv[1], O_DIRECTORY | O_RDONLY); - if (cg_fd < 0) { - printf("Failed to open cgroup path: '%s'\n", strerror(errno)); - return EXIT_FAILURE; - } - - if (load_bpf_file(argv[2])) - return EXIT_FAILURE; - - printf("Output from kernel verifier:\n%s\n-------\n", bpf_log_buf); - if (argc > 3) filter_id = atoi(argv[3]); + cg_fd = open(argv[1], O_DIRECTORY | O_RDONLY); + if (cg_fd < 0) { + printf("Failed to open cgroup path: '%s'\n", strerror(errno)); + return ret; + } + + obj = bpf_object__open_file(argv[2], NULL); + if (libbpf_get_error(obj)) { + printf("ERROR: opening BPF object file failed\n"); + return ret; + } + + bpf_object__for_each_program(prog, obj) { + progs[prog_cnt] = prog; + prog_cnt++; + } + if (filter_id >= prog_cnt) { printf("Invalid program id; program not found in file\n"); - return EXIT_FAILURE; + goto cleanup; } - ret = bpf_prog_attach(prog_fd[filter_id], cg_fd, - BPF_CGROUP_INET_SOCK_CREATE, 0); - if (ret < 0) { - printf("Failed to attach prog to cgroup: '%s'\n", - strerror(errno)); - return EXIT_FAILURE; + /* load BPF program */ + if (bpf_object__load(obj)) { + printf("ERROR: loading BPF object file failed\n"); + goto cleanup; } - return EXIT_SUCCESS; + link = bpf_program__attach_cgroup(progs[filter_id], cg_fd); + if (libbpf_get_error(link)) { + printf("ERROR: bpf_program__attach failed\n"); + link = NULL; + goto cleanup; + } + + err = bpf_link__pin(link, link_pin_path); + if (err < 0) { + printf("ERROR: bpf_link__pin failed: %d\n", err); + goto cleanup; + } + + ret = EXIT_SUCCESS; + +cleanup: + bpf_link__destroy(link); + bpf_object__close(obj); + return ret; } diff --git a/samples/bpf/test_cgrp2_sock2.sh b/samples/bpf/test_cgrp2_sock2.sh index 0f396a86e0cb..6a3dbe642b2b 100755 --- a/samples/bpf/test_cgrp2_sock2.sh +++ b/samples/bpf/test_cgrp2_sock2.sh @@ -1,6 +1,9 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +BPFFS=/sys/fs/bpf +LINK_PIN=$BPFFS/test_cgrp2_sock2 + function config_device { ip netns add at_ns0 ip link add veth0 type veth peer name veth0b @@ -21,16 +24,22 @@ function config_cgroup { echo $$ >> /tmp/cgroupv2/foo/cgroup.procs } +function config_bpffs { + if mount | grep $BPFFS > /dev/null; then + echo "bpffs already mounted" + else + echo "bpffs not mounted. Mounting..." + mount -t bpf none $BPFFS + fi +} function attach_bpf { - test_cgrp2_sock2 /tmp/cgroupv2/foo sock_flags_kern.o $1 + ./test_cgrp2_sock2 /tmp/cgroupv2/foo sock_flags_kern.o $1 [ $? -ne 0 ] && exit 1 } function cleanup { - if [ -d /tmp/cgroupv2/foo ]; then - test_cgrp2_sock -d /tmp/cgroupv2/foo - fi + rm -rf $LINK_PIN ip link del veth0b ip netns delete at_ns0 umount /tmp/cgroupv2 @@ -42,6 +51,7 @@ cleanup 2>/dev/null set -e config_device config_cgroup +config_bpffs set +e # @@ -62,6 +72,9 @@ if [ $? -eq 0 ]; then exit 1 fi +rm -rf $LINK_PIN +sleep 1 # Wait for link detach + # # Test 2 - fail ping # From 4fe6641526dbee115d9d037e94a344f4f448aaa4 Mon Sep 17 00:00:00 2001 From: "Daniel T. Lee" Date: Tue, 24 Nov 2020 09:03:06 +0000 Subject: [PATCH 3/7] samples: bpf: Refactor task_fd_query program with libbpf This commit refactors the existing kprobe program with libbpf bpf loader. To attach bpf program, this uses generic bpf_program__attach() approach rather than using bpf_load's load_bpf_file(). To attach bpf to perf_event, instead of using previous ioctl method, this commit uses bpf_program__attach_perf_event since it manages the enable of perf_event and attach of BPF programs to it, which is much more intuitive way to achieve. Also, explicit close(fd) has been removed since event will be closed inside bpf_link__destroy() automatically. Furthermore, to prevent conflict of same named uprobe events, O_TRUNC flag has been used to clear 'uprobe_events' interface. Signed-off-by: Daniel T. Lee Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201124090310.24374-4-danieltimlee@gmail.com --- samples/bpf/Makefile | 2 +- samples/bpf/task_fd_query_user.c | 101 ++++++++++++++++++++++--------- 2 files changed, 75 insertions(+), 28 deletions(-) diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index d31e082c369e..3bffd42e1482 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -107,7 +107,7 @@ xdp_adjust_tail-objs := xdp_adjust_tail_user.o xdpsock-objs := xdpsock_user.o xsk_fwd-objs := xsk_fwd.o xdp_fwd-objs := xdp_fwd_user.o -task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS) +task_fd_query-objs := task_fd_query_user.o $(TRACE_HELPERS) xdp_sample_pkts-objs := xdp_sample_pkts_user.o $(TRACE_HELPERS) ibumad-objs := bpf_load.o ibumad_user.o $(TRACE_HELPERS) hbm-objs := hbm.o $(CGROUP_HELPERS) diff --git a/samples/bpf/task_fd_query_user.c b/samples/bpf/task_fd_query_user.c index b68bd2f8fdc9..f6b772faa348 100644 --- a/samples/bpf/task_fd_query_user.c +++ b/samples/bpf/task_fd_query_user.c @@ -15,12 +15,15 @@ #include #include +#include #include -#include "bpf_load.h" #include "bpf_util.h" #include "perf-sys.h" #include "trace_helpers.h" +static struct bpf_program *progs[2]; +static struct bpf_link *links[2]; + #define CHECK_PERROR_RET(condition) ({ \ int __ret = !!(condition); \ if (__ret) { \ @@ -86,21 +89,22 @@ static int bpf_get_retprobe_bit(const char *event_type) return ret; } -static int test_debug_fs_kprobe(int prog_fd_idx, const char *fn_name, +static int test_debug_fs_kprobe(int link_idx, const char *fn_name, __u32 expected_fd_type) { __u64 probe_offset, probe_addr; __u32 len, prog_id, fd_type; + int err, event_fd; char buf[256]; - int err; len = sizeof(buf); - err = bpf_task_fd_query(getpid(), event_fd[prog_fd_idx], 0, buf, &len, + event_fd = bpf_link__fd(links[link_idx]); + err = bpf_task_fd_query(getpid(), event_fd, 0, buf, &len, &prog_id, &fd_type, &probe_offset, &probe_addr); if (err < 0) { printf("FAIL: %s, for event_fd idx %d, fn_name %s\n", - __func__, prog_fd_idx, fn_name); + __func__, link_idx, fn_name); perror(" :"); return -1; } @@ -108,7 +112,7 @@ static int test_debug_fs_kprobe(int prog_fd_idx, const char *fn_name, fd_type != expected_fd_type || probe_offset != 0x0 || probe_addr != 0x0) { printf("FAIL: bpf_trace_event_query(event_fd[%d]):\n", - prog_fd_idx); + link_idx); printf("buf: %s, fd_type: %u, probe_offset: 0x%llx," " probe_addr: 0x%llx\n", buf, fd_type, probe_offset, probe_addr); @@ -125,12 +129,13 @@ static int test_nondebug_fs_kuprobe_common(const char *event_type, int is_return_bit = bpf_get_retprobe_bit(event_type); int type = bpf_find_probe_type(event_type); struct perf_event_attr attr = {}; - int fd; + struct bpf_link *link; + int fd, err = -1; if (type < 0 || is_return_bit < 0) { printf("FAIL: %s incorrect type (%d) or is_return_bit (%d)\n", __func__, type, is_return_bit); - return -1; + return err; } attr.sample_period = 1; @@ -149,14 +154,21 @@ static int test_nondebug_fs_kuprobe_common(const char *event_type, attr.type = type; fd = sys_perf_event_open(&attr, -1, 0, -1, 0); - CHECK_PERROR_RET(fd < 0); + link = bpf_program__attach_perf_event(progs[0], fd); + if (libbpf_get_error(link)) { + printf("ERROR: bpf_program__attach_perf_event failed\n"); + link = NULL; + close(fd); + goto cleanup; + } - CHECK_PERROR_RET(ioctl(fd, PERF_EVENT_IOC_ENABLE, 0) < 0); - CHECK_PERROR_RET(ioctl(fd, PERF_EVENT_IOC_SET_BPF, prog_fd[0]) < 0); CHECK_PERROR_RET(bpf_task_fd_query(getpid(), fd, 0, buf, buf_len, prog_id, fd_type, probe_offset, probe_addr) < 0); + err = 0; - return 0; +cleanup: + bpf_link__destroy(link); + return err; } static int test_nondebug_fs_probe(const char *event_type, const char *name, @@ -215,17 +227,18 @@ static int test_nondebug_fs_probe(const char *event_type, const char *name, static int test_debug_fs_uprobe(char *binary_path, long offset, bool is_return) { + char buf[256], event_alias[sizeof("test_1234567890")]; const char *event_type = "uprobe"; struct perf_event_attr attr = {}; - char buf[256], event_alias[sizeof("test_1234567890")]; __u64 probe_offset, probe_addr; __u32 len, prog_id, fd_type; - int err, res, kfd, efd; + int err = -1, res, kfd, efd; + struct bpf_link *link; ssize_t bytes; snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events", event_type); - kfd = open(buf, O_WRONLY | O_APPEND, 0); + kfd = open(buf, O_WRONLY | O_TRUNC, 0); CHECK_PERROR_RET(kfd < 0); res = snprintf(event_alias, sizeof(event_alias), "test_%d", getpid()); @@ -254,10 +267,15 @@ static int test_debug_fs_uprobe(char *binary_path, long offset, bool is_return) attr.type = PERF_TYPE_TRACEPOINT; attr.sample_period = 1; attr.wakeup_events = 1; + kfd = sys_perf_event_open(&attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC); - CHECK_PERROR_RET(kfd < 0); - CHECK_PERROR_RET(ioctl(kfd, PERF_EVENT_IOC_SET_BPF, prog_fd[0]) < 0); - CHECK_PERROR_RET(ioctl(kfd, PERF_EVENT_IOC_ENABLE, 0) < 0); + link = bpf_program__attach_perf_event(progs[0], kfd); + if (libbpf_get_error(link)) { + printf("ERROR: bpf_program__attach_perf_event failed\n"); + link = NULL; + close(kfd); + goto cleanup; + } len = sizeof(buf); err = bpf_task_fd_query(getpid(), kfd, 0, buf, &len, @@ -283,9 +301,11 @@ static int test_debug_fs_uprobe(char *binary_path, long offset, bool is_return) probe_offset); return -1; } + err = 0; - close(kfd); - return 0; +cleanup: + bpf_link__destroy(link); + return err; } int main(int argc, char **argv) @@ -294,21 +314,42 @@ int main(int argc, char **argv) extern char __executable_start; char filename[256], buf[256]; __u64 uprobe_file_offset; + struct bpf_program *prog; + struct bpf_object *obj; + int i = 0, err = -1; - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); if (setrlimit(RLIMIT_MEMLOCK, &r)) { perror("setrlimit(RLIMIT_MEMLOCK)"); - return 1; + return err; } if (load_kallsyms()) { printf("failed to process /proc/kallsyms\n"); - return 1; + return err; } - if (load_bpf_file(filename)) { - printf("%s", bpf_log_buf); - return 1; + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + obj = bpf_object__open_file(filename, NULL); + if (libbpf_get_error(obj)) { + fprintf(stderr, "ERROR: opening BPF object file failed\n"); + return err; + } + + /* load BPF program */ + if (bpf_object__load(obj)) { + fprintf(stderr, "ERROR: loading BPF object file failed\n"); + goto cleanup; + } + + bpf_object__for_each_program(prog, obj) { + progs[i] = prog; + links[i] = bpf_program__attach(progs[i]); + if (libbpf_get_error(links[i])) { + fprintf(stderr, "ERROR: bpf_program__attach failed\n"); + links[i] = NULL; + goto cleanup; + } + i++; } /* test two functions in the corresponding *_kern.c file */ @@ -378,6 +419,12 @@ int main(int argc, char **argv) false)); CHECK_AND_RET(test_debug_fs_uprobe((char *)argv[0], uprobe_file_offset, true)); + err = 0; - return 0; +cleanup: + for (i--; i >= 0; i--) + bpf_link__destroy(links[i]); + + bpf_object__close(obj); + return err; } From 763af200d6160b2f79f45cbf9a85b8dc6e20f2c7 Mon Sep 17 00:00:00 2001 From: "Daniel T. Lee" Date: Tue, 24 Nov 2020 09:03:07 +0000 Subject: [PATCH 4/7] samples: bpf: Refactor ibumad program with libbpf This commit refactors the existing ibumad program with libbpf bpf loader. Attach/detach of Tracepoint bpf programs has been managed with the generic bpf_program__attach() and bpf_link__destroy() from the libbpf. Also, instead of using the previous BPF MAP definition, this commit refactors ibumad MAP definition with the new BTF-defined MAP format. To verify that this bpf program works without an infiniband device, try loading ib_umad kernel module and test the program as follows: # modprobe ib_umad # ./ibumad Moreover, TRACE_HELPERS has been removed from the Makefile since it is not used on this program. Signed-off-by: Daniel T. Lee Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201124090310.24374-5-danieltimlee@gmail.com --- samples/bpf/Makefile | 2 +- samples/bpf/ibumad_kern.c | 24 ++++++------- samples/bpf/ibumad_user.c | 71 +++++++++++++++++++++++++++++---------- 3 files changed, 67 insertions(+), 30 deletions(-) diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 3bffd42e1482..09a249477554 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -109,7 +109,7 @@ xsk_fwd-objs := xsk_fwd.o xdp_fwd-objs := xdp_fwd_user.o task_fd_query-objs := task_fd_query_user.o $(TRACE_HELPERS) xdp_sample_pkts-objs := xdp_sample_pkts_user.o $(TRACE_HELPERS) -ibumad-objs := bpf_load.o ibumad_user.o $(TRACE_HELPERS) +ibumad-objs := ibumad_user.o hbm-objs := hbm.o $(CGROUP_HELPERS) # Tell kbuild to always build the programs diff --git a/samples/bpf/ibumad_kern.c b/samples/bpf/ibumad_kern.c index 3a91b4c1989a..26dcd4dde946 100644 --- a/samples/bpf/ibumad_kern.c +++ b/samples/bpf/ibumad_kern.c @@ -16,19 +16,19 @@ #include -struct bpf_map_def SEC("maps") read_count = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(u32), /* class; u32 required */ - .value_size = sizeof(u64), /* count of mads read */ - .max_entries = 256, /* Room for all Classes */ -}; +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, u32); /* class; u32 required */ + __type(value, u64); /* count of mads read */ + __uint(max_entries, 256); /* Room for all Classes */ +} read_count SEC(".maps"); -struct bpf_map_def SEC("maps") write_count = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(u32), /* class; u32 required */ - .value_size = sizeof(u64), /* count of mads written */ - .max_entries = 256, /* Room for all Classes */ -}; +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, u32); /* class; u32 required */ + __type(value, u64); /* count of mads written */ + __uint(max_entries, 256); /* Room for all Classes */ +} write_count SEC(".maps"); #undef DEBUG #ifndef DEBUG diff --git a/samples/bpf/ibumad_user.c b/samples/bpf/ibumad_user.c index fa06eef31a84..d83d8102f489 100644 --- a/samples/bpf/ibumad_user.c +++ b/samples/bpf/ibumad_user.c @@ -23,10 +23,15 @@ #include #include -#include "bpf_load.h" +#include #include "bpf_util.h" #include +static struct bpf_link *tp_links[3]; +static struct bpf_object *obj; +static int map_fd[2]; +static int tp_cnt; + static void dump_counts(int fd) { __u32 key; @@ -53,6 +58,11 @@ static void dump_all_counts(void) static void dump_exit(int sig) { dump_all_counts(); + /* Detach tracepoints */ + while (tp_cnt) + bpf_link__destroy(tp_links[--tp_cnt]); + + bpf_object__close(obj); exit(0); } @@ -73,19 +83,11 @@ static void usage(char *cmd) int main(int argc, char **argv) { + struct bpf_program *prog; unsigned long delay = 5; + char filename[256]; int longindex = 0; - int opt; - char bpf_file[256]; - - /* Create the eBPF kernel code path name. - * This follows the pattern of all of the other bpf samples - */ - snprintf(bpf_file, sizeof(bpf_file), "%s_kern.o", argv[0]); - - /* Do one final dump when exiting */ - signal(SIGINT, dump_exit); - signal(SIGTERM, dump_exit); + int opt, err = -1; while ((opt = getopt_long(argc, argv, "hd:rSw", long_options, &longindex)) != -1) { @@ -107,16 +109,51 @@ int main(int argc, char **argv) } } - if (load_bpf_file(bpf_file)) { - fprintf(stderr, "ERROR: failed to load eBPF from file : %s\n", - bpf_file); - return 1; + /* Do one final dump when exiting */ + signal(SIGINT, dump_exit); + signal(SIGTERM, dump_exit); + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + obj = bpf_object__open_file(filename, NULL); + if (libbpf_get_error(obj)) { + fprintf(stderr, "ERROR: opening BPF object file failed\n"); + return err; + } + + /* load BPF program */ + if (bpf_object__load(obj)) { + fprintf(stderr, "ERROR: loading BPF object file failed\n"); + goto cleanup; + } + + map_fd[0] = bpf_object__find_map_fd_by_name(obj, "read_count"); + map_fd[1] = bpf_object__find_map_fd_by_name(obj, "write_count"); + if (map_fd[0] < 0 || map_fd[1] < 0) { + fprintf(stderr, "ERROR: finding a map in obj file failed\n"); + goto cleanup; + } + + bpf_object__for_each_program(prog, obj) { + tp_links[tp_cnt] = bpf_program__attach(prog); + if (libbpf_get_error(tp_links[tp_cnt])) { + fprintf(stderr, "ERROR: bpf_program__attach failed\n"); + tp_links[tp_cnt] = NULL; + goto cleanup; + } + tp_cnt++; } while (1) { sleep(delay); dump_all_counts(); } + err = 0; - return 0; +cleanup: + /* Detach tracepoints */ + while (tp_cnt) + bpf_link__destroy(tp_links[--tp_cnt]); + + bpf_object__close(obj); + return err; } From c6497df0ddc3363c2c940d92c3af0b2620477003 Mon Sep 17 00:00:00 2001 From: "Daniel T. Lee" Date: Tue, 24 Nov 2020 09:03:08 +0000 Subject: [PATCH 5/7] samples: bpf: Refactor test_overhead program with libbpf This commit refactors the existing program with libbpf bpf loader. Since the kprobe, tracepoint and raw_tracepoint bpf program can be attached with single bpf_program__attach() interface, so the corresponding function of libbpf is used here. Rather than specifying the number of cpus inside the code, this commit uses the number of available cpus with _SC_NPROCESSORS_ONLN. Signed-off-by: Daniel T. Lee Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201124090310.24374-6-danieltimlee@gmail.com --- samples/bpf/Makefile | 2 +- samples/bpf/test_overhead_user.c | 82 +++++++++++++++++++++++--------- 2 files changed, 60 insertions(+), 24 deletions(-) diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 09a249477554..25380e04897e 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -78,7 +78,7 @@ lathist-objs := lathist_user.o offwaketime-objs := offwaketime_user.o $(TRACE_HELPERS) spintest-objs := spintest_user.o $(TRACE_HELPERS) map_perf_test-objs := map_perf_test_user.o -test_overhead-objs := bpf_load.o test_overhead_user.o +test_overhead-objs := test_overhead_user.o test_cgrp2_array_pin-objs := test_cgrp2_array_pin.o test_cgrp2_attach-objs := test_cgrp2_attach.o test_cgrp2_sock-objs := test_cgrp2_sock.o diff --git a/samples/bpf/test_overhead_user.c b/samples/bpf/test_overhead_user.c index 94f74112a20e..819a6fe86f89 100644 --- a/samples/bpf/test_overhead_user.c +++ b/samples/bpf/test_overhead_user.c @@ -18,10 +18,14 @@ #include #include #include -#include "bpf_load.h" +#include #define MAX_CNT 1000000 +static struct bpf_link *links[2]; +static struct bpf_object *obj; +static int cnt; + static __u64 time_get_ns(void) { struct timespec ts; @@ -115,20 +119,54 @@ static void run_perf_test(int tasks, int flags) } } +static int load_progs(char *filename) +{ + struct bpf_program *prog; + int err = 0; + + obj = bpf_object__open_file(filename, NULL); + err = libbpf_get_error(obj); + if (err < 0) { + fprintf(stderr, "ERROR: opening BPF object file failed\n"); + return err; + } + + /* load BPF program */ + err = bpf_object__load(obj); + if (err < 0) { + fprintf(stderr, "ERROR: loading BPF object file failed\n"); + return err; + } + + bpf_object__for_each_program(prog, obj) { + links[cnt] = bpf_program__attach(prog); + err = libbpf_get_error(links[cnt]); + if (err < 0) { + fprintf(stderr, "ERROR: bpf_program__attach failed\n"); + links[cnt] = NULL; + return err; + } + cnt++; + } + + return err; +} + static void unload_progs(void) { - close(prog_fd[0]); - close(prog_fd[1]); - close(event_fd[0]); - close(event_fd[1]); + while (cnt) + bpf_link__destroy(links[--cnt]); + + bpf_object__close(obj); } int main(int argc, char **argv) { struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; - char filename[256]; - int num_cpu = 8; + int num_cpu = sysconf(_SC_NPROCESSORS_ONLN); int test_flags = ~0; + char filename[256]; + int err = 0; setrlimit(RLIMIT_MEMLOCK, &r); @@ -145,38 +183,36 @@ int main(int argc, char **argv) if (test_flags & 0xC) { snprintf(filename, sizeof(filename), "%s_kprobe_kern.o", argv[0]); - if (load_bpf_file(filename)) { - printf("%s", bpf_log_buf); - return 1; - } + printf("w/KPROBE\n"); - run_perf_test(num_cpu, test_flags >> 2); + err = load_progs(filename); + if (!err) + run_perf_test(num_cpu, test_flags >> 2); + unload_progs(); } if (test_flags & 0x30) { snprintf(filename, sizeof(filename), "%s_tp_kern.o", argv[0]); - if (load_bpf_file(filename)) { - printf("%s", bpf_log_buf); - return 1; - } printf("w/TRACEPOINT\n"); - run_perf_test(num_cpu, test_flags >> 4); + err = load_progs(filename); + if (!err) + run_perf_test(num_cpu, test_flags >> 4); + unload_progs(); } if (test_flags & 0xC0) { snprintf(filename, sizeof(filename), "%s_raw_tp_kern.o", argv[0]); - if (load_bpf_file(filename)) { - printf("%s", bpf_log_buf); - return 1; - } printf("w/RAW_TRACEPOINT\n"); - run_perf_test(num_cpu, test_flags >> 6); + err = load_progs(filename); + if (!err) + run_perf_test(num_cpu, test_flags >> 6); + unload_progs(); } - return 0; + return err; } From 0afe0a998c40085a6342e1aeb4c510cccba46caf Mon Sep 17 00:00:00 2001 From: "Daniel T. Lee" Date: Tue, 24 Nov 2020 09:03:09 +0000 Subject: [PATCH 6/7] samples: bpf: Fix lwt_len_hist reusing previous BPF map Currently, lwt_len_hist's map lwt_len_hist_map is uses pinning, and the map isn't cleared on test end. This leds to reuse of that map for each test, which prevents the results of the test from being accurate. This commit fixes the problem by removing of pinned map from bpffs. Also, this commit add the executable permission to shell script files. Fixes: f74599f7c5309 ("bpf: Add tests and samples for LWT-BPF") Signed-off-by: Daniel T. Lee Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201124090310.24374-7-danieltimlee@gmail.com --- samples/bpf/lwt_len_hist.sh | 2 ++ samples/bpf/test_lwt_bpf.sh | 0 2 files changed, 2 insertions(+) mode change 100644 => 100755 samples/bpf/lwt_len_hist.sh mode change 100644 => 100755 samples/bpf/test_lwt_bpf.sh diff --git a/samples/bpf/lwt_len_hist.sh b/samples/bpf/lwt_len_hist.sh old mode 100644 new mode 100755 index 090b96eaf7f7..0eda9754f50b --- a/samples/bpf/lwt_len_hist.sh +++ b/samples/bpf/lwt_len_hist.sh @@ -8,6 +8,8 @@ VETH1=tst_lwt1b TRACE_ROOT=/sys/kernel/debug/tracing function cleanup { + # To reset saved histogram, remove pinned map + rm /sys/fs/bpf/tc/globals/lwt_len_hist_map ip route del 192.168.253.2/32 dev $VETH0 2> /dev/null ip link del $VETH0 2> /dev/null ip link del $VETH1 2> /dev/null diff --git a/samples/bpf/test_lwt_bpf.sh b/samples/bpf/test_lwt_bpf.sh old mode 100644 new mode 100755 From ceb5dea5654354fb4e6e393c99f1d0bf4debab0e Mon Sep 17 00:00:00 2001 From: "Daniel T. Lee" Date: Tue, 24 Nov 2020 09:03:10 +0000 Subject: [PATCH 7/7] samples: bpf: Remove bpf_load loader completely Numerous refactoring that rewrites BPF programs written with bpf_load to use the libbpf loader was finally completed, resulting in BPF programs using bpf_load within the kernel being completely no longer present. This commit removes bpf_load, an outdated bpf loader that is difficult to keep up with the latest kernel BPF and causes confusion. Also, this commit removes the unused trace_helper and bpf_load from samples/bpf target objects from Makefile. Signed-off-by: Daniel T. Lee Signed-off-by: Andrii Nakryiko Acked-by: Jesper Dangaard Brouer Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201124090310.24374-8-danieltimlee@gmail.com --- samples/bpf/Makefile | 10 +- samples/bpf/bpf_load.c | 667 -------------------------------- samples/bpf/bpf_load.h | 57 --- samples/bpf/xdp2skb_meta_kern.c | 2 +- 4 files changed, 5 insertions(+), 731 deletions(-) delete mode 100644 samples/bpf/bpf_load.c delete mode 100644 samples/bpf/bpf_load.h diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 25380e04897e..05db041f8b18 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -73,7 +73,7 @@ tracex5-objs := tracex5_user.o $(TRACE_HELPERS) tracex6-objs := tracex6_user.o tracex7-objs := tracex7_user.o test_probe_write_user-objs := test_probe_write_user_user.o -trace_output-objs := trace_output_user.o $(TRACE_HELPERS) +trace_output-objs := trace_output_user.o lathist-objs := lathist_user.o offwaketime-objs := offwaketime_user.o $(TRACE_HELPERS) spintest-objs := spintest_user.o $(TRACE_HELPERS) @@ -91,8 +91,8 @@ test_current_task_under_cgroup-objs := $(CGROUP_HELPERS) \ test_current_task_under_cgroup_user.o trace_event-objs := trace_event_user.o $(TRACE_HELPERS) sampleip-objs := sampleip_user.o $(TRACE_HELPERS) -tc_l2_redirect-objs := bpf_load.o tc_l2_redirect_user.o -lwt_len_hist-objs := bpf_load.o lwt_len_hist_user.o +tc_l2_redirect-objs := tc_l2_redirect_user.o +lwt_len_hist-objs := lwt_len_hist_user.o xdp_tx_iptunnel-objs := xdp_tx_iptunnel_user.o test_map_in_map-objs := test_map_in_map_user.o per_socket_stats_example-objs := cookie_uid_helper_example.o @@ -108,7 +108,7 @@ xdpsock-objs := xdpsock_user.o xsk_fwd-objs := xsk_fwd.o xdp_fwd-objs := xdp_fwd_user.o task_fd_query-objs := task_fd_query_user.o $(TRACE_HELPERS) -xdp_sample_pkts-objs := xdp_sample_pkts_user.o $(TRACE_HELPERS) +xdp_sample_pkts-objs := xdp_sample_pkts_user.o ibumad-objs := ibumad_user.o hbm-objs := hbm.o $(CGROUP_HELPERS) @@ -197,8 +197,6 @@ TPROGS_CFLAGS += --sysroot=$(SYSROOT) TPROGS_LDFLAGS := -L$(SYSROOT)/usr/lib endif -TPROGCFLAGS_bpf_load.o += -Wno-unused-variable - TPROGS_LDLIBS += $(LIBBPF) -lelf -lz TPROGLDLIBS_tracex4 += -lrt TPROGLDLIBS_trace_output += -lrt diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c deleted file mode 100644 index c5ad528f046e..000000000000 --- a/samples/bpf/bpf_load.c +++ /dev/null @@ -1,667 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "bpf_load.h" -#include "perf-sys.h" - -#define DEBUGFS "/sys/kernel/debug/tracing/" - -static char license[128]; -static int kern_version; -static bool processed_sec[128]; -char bpf_log_buf[BPF_LOG_BUF_SIZE]; -int map_fd[MAX_MAPS]; -int prog_fd[MAX_PROGS]; -int event_fd[MAX_PROGS]; -int prog_cnt; -int prog_array_fd = -1; - -struct bpf_map_data map_data[MAX_MAPS]; -int map_data_count; - -static int populate_prog_array(const char *event, int prog_fd) -{ - int ind = atoi(event), err; - - err = bpf_map_update_elem(prog_array_fd, &ind, &prog_fd, BPF_ANY); - if (err < 0) { - printf("failed to store prog_fd in prog_array\n"); - return -1; - } - return 0; -} - -static int write_kprobe_events(const char *val) -{ - int fd, ret, flags; - - if (val == NULL) - return -1; - else if (val[0] == '\0') - flags = O_WRONLY | O_TRUNC; - else - flags = O_WRONLY | O_APPEND; - - fd = open(DEBUGFS "kprobe_events", flags); - - ret = write(fd, val, strlen(val)); - close(fd); - - return ret; -} - -static int load_and_attach(const char *event, struct bpf_insn *prog, int size) -{ - bool is_socket = strncmp(event, "socket", 6) == 0; - bool is_kprobe = strncmp(event, "kprobe/", 7) == 0; - bool is_kretprobe = strncmp(event, "kretprobe/", 10) == 0; - bool is_tracepoint = strncmp(event, "tracepoint/", 11) == 0; - bool is_raw_tracepoint = strncmp(event, "raw_tracepoint/", 15) == 0; - bool is_xdp = strncmp(event, "xdp", 3) == 0; - bool is_perf_event = strncmp(event, "perf_event", 10) == 0; - bool is_cgroup_skb = strncmp(event, "cgroup/skb", 10) == 0; - bool is_cgroup_sk = strncmp(event, "cgroup/sock", 11) == 0; - bool is_sockops = strncmp(event, "sockops", 7) == 0; - bool is_sk_skb = strncmp(event, "sk_skb", 6) == 0; - bool is_sk_msg = strncmp(event, "sk_msg", 6) == 0; - size_t insns_cnt = size / sizeof(struct bpf_insn); - enum bpf_prog_type prog_type; - char buf[256]; - int fd, efd, err, id; - struct perf_event_attr attr = {}; - - attr.type = PERF_TYPE_TRACEPOINT; - attr.sample_type = PERF_SAMPLE_RAW; - attr.sample_period = 1; - attr.wakeup_events = 1; - - if (is_socket) { - prog_type = BPF_PROG_TYPE_SOCKET_FILTER; - } else if (is_kprobe || is_kretprobe) { - prog_type = BPF_PROG_TYPE_KPROBE; - } else if (is_tracepoint) { - prog_type = BPF_PROG_TYPE_TRACEPOINT; - } else if (is_raw_tracepoint) { - prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT; - } else if (is_xdp) { - prog_type = BPF_PROG_TYPE_XDP; - } else if (is_perf_event) { - prog_type = BPF_PROG_TYPE_PERF_EVENT; - } else if (is_cgroup_skb) { - prog_type = BPF_PROG_TYPE_CGROUP_SKB; - } else if (is_cgroup_sk) { - prog_type = BPF_PROG_TYPE_CGROUP_SOCK; - } else if (is_sockops) { - prog_type = BPF_PROG_TYPE_SOCK_OPS; - } else if (is_sk_skb) { - prog_type = BPF_PROG_TYPE_SK_SKB; - } else if (is_sk_msg) { - prog_type = BPF_PROG_TYPE_SK_MSG; - } else { - printf("Unknown event '%s'\n", event); - return -1; - } - - if (prog_cnt == MAX_PROGS) - return -1; - - fd = bpf_load_program(prog_type, prog, insns_cnt, license, kern_version, - bpf_log_buf, BPF_LOG_BUF_SIZE); - if (fd < 0) { - printf("bpf_load_program() err=%d\n%s", errno, bpf_log_buf); - return -1; - } - - prog_fd[prog_cnt++] = fd; - - if (is_xdp || is_perf_event || is_cgroup_skb || is_cgroup_sk) - return 0; - - if (is_socket || is_sockops || is_sk_skb || is_sk_msg) { - if (is_socket) - event += 6; - else - event += 7; - if (*event != '/') - return 0; - event++; - if (!isdigit(*event)) { - printf("invalid prog number\n"); - return -1; - } - return populate_prog_array(event, fd); - } - - if (is_raw_tracepoint) { - efd = bpf_raw_tracepoint_open(event + 15, fd); - if (efd < 0) { - printf("tracepoint %s %s\n", event + 15, strerror(errno)); - return -1; - } - event_fd[prog_cnt - 1] = efd; - return 0; - } - - if (is_kprobe || is_kretprobe) { - bool need_normal_check = true; - const char *event_prefix = ""; - - if (is_kprobe) - event += 7; - else - event += 10; - - if (*event == 0) { - printf("event name cannot be empty\n"); - return -1; - } - - if (isdigit(*event)) - return populate_prog_array(event, fd); - -#ifdef __x86_64__ - if (strncmp(event, "sys_", 4) == 0) { - snprintf(buf, sizeof(buf), "%c:__x64_%s __x64_%s", - is_kprobe ? 'p' : 'r', event, event); - err = write_kprobe_events(buf); - if (err >= 0) { - need_normal_check = false; - event_prefix = "__x64_"; - } - } -#endif - if (need_normal_check) { - snprintf(buf, sizeof(buf), "%c:%s %s", - is_kprobe ? 'p' : 'r', event, event); - err = write_kprobe_events(buf); - if (err < 0) { - printf("failed to create kprobe '%s' error '%s'\n", - event, strerror(errno)); - return -1; - } - } - - strcpy(buf, DEBUGFS); - strcat(buf, "events/kprobes/"); - strcat(buf, event_prefix); - strcat(buf, event); - strcat(buf, "/id"); - } else if (is_tracepoint) { - event += 11; - - if (*event == 0) { - printf("event name cannot be empty\n"); - return -1; - } - strcpy(buf, DEBUGFS); - strcat(buf, "events/"); - strcat(buf, event); - strcat(buf, "/id"); - } - - efd = open(buf, O_RDONLY, 0); - if (efd < 0) { - printf("failed to open event %s\n", event); - return -1; - } - - err = read(efd, buf, sizeof(buf)); - if (err < 0 || err >= sizeof(buf)) { - printf("read from '%s' failed '%s'\n", event, strerror(errno)); - return -1; - } - - close(efd); - - buf[err] = 0; - id = atoi(buf); - attr.config = id; - - efd = sys_perf_event_open(&attr, -1/*pid*/, 0/*cpu*/, -1/*group_fd*/, 0); - if (efd < 0) { - printf("event %d fd %d err %s\n", id, efd, strerror(errno)); - return -1; - } - event_fd[prog_cnt - 1] = efd; - err = ioctl(efd, PERF_EVENT_IOC_ENABLE, 0); - if (err < 0) { - printf("ioctl PERF_EVENT_IOC_ENABLE failed err %s\n", - strerror(errno)); - return -1; - } - err = ioctl(efd, PERF_EVENT_IOC_SET_BPF, fd); - if (err < 0) { - printf("ioctl PERF_EVENT_IOC_SET_BPF failed err %s\n", - strerror(errno)); - return -1; - } - - return 0; -} - -static int load_maps(struct bpf_map_data *maps, int nr_maps, - fixup_map_cb fixup_map) -{ - int i, numa_node; - - for (i = 0; i < nr_maps; i++) { - if (fixup_map) { - fixup_map(&maps[i], i); - /* Allow userspace to assign map FD prior to creation */ - if (maps[i].fd != -1) { - map_fd[i] = maps[i].fd; - continue; - } - } - - numa_node = maps[i].def.map_flags & BPF_F_NUMA_NODE ? - maps[i].def.numa_node : -1; - - if (maps[i].def.type == BPF_MAP_TYPE_ARRAY_OF_MAPS || - maps[i].def.type == BPF_MAP_TYPE_HASH_OF_MAPS) { - int inner_map_fd = map_fd[maps[i].def.inner_map_idx]; - - map_fd[i] = bpf_create_map_in_map_node(maps[i].def.type, - maps[i].name, - maps[i].def.key_size, - inner_map_fd, - maps[i].def.max_entries, - maps[i].def.map_flags, - numa_node); - } else { - map_fd[i] = bpf_create_map_node(maps[i].def.type, - maps[i].name, - maps[i].def.key_size, - maps[i].def.value_size, - maps[i].def.max_entries, - maps[i].def.map_flags, - numa_node); - } - if (map_fd[i] < 0) { - printf("failed to create map %d (%s): %d %s\n", - i, maps[i].name, errno, strerror(errno)); - return 1; - } - maps[i].fd = map_fd[i]; - - if (maps[i].def.type == BPF_MAP_TYPE_PROG_ARRAY) - prog_array_fd = map_fd[i]; - } - return 0; -} - -static int get_sec(Elf *elf, int i, GElf_Ehdr *ehdr, char **shname, - GElf_Shdr *shdr, Elf_Data **data) -{ - Elf_Scn *scn; - - scn = elf_getscn(elf, i); - if (!scn) - return 1; - - if (gelf_getshdr(scn, shdr) != shdr) - return 2; - - *shname = elf_strptr(elf, ehdr->e_shstrndx, shdr->sh_name); - if (!*shname || !shdr->sh_size) - return 3; - - *data = elf_getdata(scn, 0); - if (!*data || elf_getdata(scn, *data) != NULL) - return 4; - - return 0; -} - -static int parse_relo_and_apply(Elf_Data *data, Elf_Data *symbols, - GElf_Shdr *shdr, struct bpf_insn *insn, - struct bpf_map_data *maps, int nr_maps) -{ - int i, nrels; - - nrels = shdr->sh_size / shdr->sh_entsize; - - for (i = 0; i < nrels; i++) { - GElf_Sym sym; - GElf_Rel rel; - unsigned int insn_idx; - bool match = false; - int j, map_idx; - - gelf_getrel(data, i, &rel); - - insn_idx = rel.r_offset / sizeof(struct bpf_insn); - - gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym); - - if (insn[insn_idx].code != (BPF_LD | BPF_IMM | BPF_DW)) { - printf("invalid relo for insn[%d].code 0x%x\n", - insn_idx, insn[insn_idx].code); - return 1; - } - insn[insn_idx].src_reg = BPF_PSEUDO_MAP_FD; - - /* Match FD relocation against recorded map_data[] offset */ - for (map_idx = 0; map_idx < nr_maps; map_idx++) { - if (maps[map_idx].elf_offset == sym.st_value) { - match = true; - break; - } - } - if (match) { - insn[insn_idx].imm = maps[map_idx].fd; - } else { - printf("invalid relo for insn[%d] no map_data match\n", - insn_idx); - return 1; - } - } - - return 0; -} - -static int cmp_symbols(const void *l, const void *r) -{ - const GElf_Sym *lsym = (const GElf_Sym *)l; - const GElf_Sym *rsym = (const GElf_Sym *)r; - - if (lsym->st_value < rsym->st_value) - return -1; - else if (lsym->st_value > rsym->st_value) - return 1; - else - return 0; -} - -static int load_elf_maps_section(struct bpf_map_data *maps, int maps_shndx, - Elf *elf, Elf_Data *symbols, int strtabidx) -{ - int map_sz_elf, map_sz_copy; - bool validate_zero = false; - Elf_Data *data_maps; - int i, nr_maps; - GElf_Sym *sym; - Elf_Scn *scn; - int copy_sz; - - if (maps_shndx < 0) - return -EINVAL; - if (!symbols) - return -EINVAL; - - /* Get data for maps section via elf index */ - scn = elf_getscn(elf, maps_shndx); - if (scn) - data_maps = elf_getdata(scn, NULL); - if (!scn || !data_maps) { - printf("Failed to get Elf_Data from maps section %d\n", - maps_shndx); - return -EINVAL; - } - - /* For each map get corrosponding symbol table entry */ - sym = calloc(MAX_MAPS+1, sizeof(GElf_Sym)); - for (i = 0, nr_maps = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) { - assert(nr_maps < MAX_MAPS+1); - if (!gelf_getsym(symbols, i, &sym[nr_maps])) - continue; - if (sym[nr_maps].st_shndx != maps_shndx) - continue; - /* Only increment iif maps section */ - nr_maps++; - } - - /* Align to map_fd[] order, via sort on offset in sym.st_value */ - qsort(sym, nr_maps, sizeof(GElf_Sym), cmp_symbols); - - /* Keeping compatible with ELF maps section changes - * ------------------------------------------------ - * The program size of struct bpf_load_map_def is known by loader - * code, but struct stored in ELF file can be different. - * - * Unfortunately sym[i].st_size is zero. To calculate the - * struct size stored in the ELF file, assume all struct have - * the same size, and simply divide with number of map - * symbols. - */ - map_sz_elf = data_maps->d_size / nr_maps; - map_sz_copy = sizeof(struct bpf_load_map_def); - if (map_sz_elf < map_sz_copy) { - /* - * Backward compat, loading older ELF file with - * smaller struct, keeping remaining bytes zero. - */ - map_sz_copy = map_sz_elf; - } else if (map_sz_elf > map_sz_copy) { - /* - * Forward compat, loading newer ELF file with larger - * struct with unknown features. Assume zero means - * feature not used. Thus, validate rest of struct - * data is zero. - */ - validate_zero = true; - } - - /* Memcpy relevant part of ELF maps data to loader maps */ - for (i = 0; i < nr_maps; i++) { - struct bpf_load_map_def *def; - unsigned char *addr, *end; - const char *map_name; - size_t offset; - - map_name = elf_strptr(elf, strtabidx, sym[i].st_name); - maps[i].name = strdup(map_name); - if (!maps[i].name) { - printf("strdup(%s): %s(%d)\n", map_name, - strerror(errno), errno); - free(sym); - return -errno; - } - - /* Symbol value is offset into ELF maps section data area */ - offset = sym[i].st_value; - def = (struct bpf_load_map_def *)(data_maps->d_buf + offset); - maps[i].elf_offset = offset; - memset(&maps[i].def, 0, sizeof(struct bpf_load_map_def)); - memcpy(&maps[i].def, def, map_sz_copy); - - /* Verify no newer features were requested */ - if (validate_zero) { - addr = (unsigned char *) def + map_sz_copy; - end = (unsigned char *) def + map_sz_elf; - for (; addr < end; addr++) { - if (*addr != 0) { - free(sym); - return -EFBIG; - } - } - } - } - - free(sym); - return nr_maps; -} - -static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map) -{ - int fd, i, ret, maps_shndx = -1, strtabidx = -1; - Elf *elf; - GElf_Ehdr ehdr; - GElf_Shdr shdr, shdr_prog; - Elf_Data *data, *data_prog, *data_maps = NULL, *symbols = NULL; - char *shname, *shname_prog; - int nr_maps = 0; - - /* reset global variables */ - kern_version = 0; - memset(license, 0, sizeof(license)); - memset(processed_sec, 0, sizeof(processed_sec)); - - if (elf_version(EV_CURRENT) == EV_NONE) - return 1; - - fd = open(path, O_RDONLY, 0); - if (fd < 0) - return 1; - - elf = elf_begin(fd, ELF_C_READ, NULL); - - if (!elf) - return 1; - - if (gelf_getehdr(elf, &ehdr) != &ehdr) - return 1; - - /* clear all kprobes */ - i = write_kprobe_events(""); - - /* scan over all elf sections to get license and map info */ - for (i = 1; i < ehdr.e_shnum; i++) { - - if (get_sec(elf, i, &ehdr, &shname, &shdr, &data)) - continue; - - if (0) /* helpful for llvm debugging */ - printf("section %d:%s data %p size %zd link %d flags %d\n", - i, shname, data->d_buf, data->d_size, - shdr.sh_link, (int) shdr.sh_flags); - - if (strcmp(shname, "license") == 0) { - processed_sec[i] = true; - memcpy(license, data->d_buf, data->d_size); - } else if (strcmp(shname, "version") == 0) { - processed_sec[i] = true; - if (data->d_size != sizeof(int)) { - printf("invalid size of version section %zd\n", - data->d_size); - return 1; - } - memcpy(&kern_version, data->d_buf, sizeof(int)); - } else if (strcmp(shname, "maps") == 0) { - int j; - - maps_shndx = i; - data_maps = data; - for (j = 0; j < MAX_MAPS; j++) - map_data[j].fd = -1; - } else if (shdr.sh_type == SHT_SYMTAB) { - strtabidx = shdr.sh_link; - symbols = data; - } - } - - ret = 1; - - if (!symbols) { - printf("missing SHT_SYMTAB section\n"); - goto done; - } - - if (data_maps) { - nr_maps = load_elf_maps_section(map_data, maps_shndx, - elf, symbols, strtabidx); - if (nr_maps < 0) { - printf("Error: Failed loading ELF maps (errno:%d):%s\n", - nr_maps, strerror(-nr_maps)); - goto done; - } - if (load_maps(map_data, nr_maps, fixup_map)) - goto done; - map_data_count = nr_maps; - - processed_sec[maps_shndx] = true; - } - - /* process all relo sections, and rewrite bpf insns for maps */ - for (i = 1; i < ehdr.e_shnum; i++) { - if (processed_sec[i]) - continue; - - if (get_sec(elf, i, &ehdr, &shname, &shdr, &data)) - continue; - - if (shdr.sh_type == SHT_REL) { - struct bpf_insn *insns; - - /* locate prog sec that need map fixup (relocations) */ - if (get_sec(elf, shdr.sh_info, &ehdr, &shname_prog, - &shdr_prog, &data_prog)) - continue; - - if (shdr_prog.sh_type != SHT_PROGBITS || - !(shdr_prog.sh_flags & SHF_EXECINSTR)) - continue; - - insns = (struct bpf_insn *) data_prog->d_buf; - processed_sec[i] = true; /* relo section */ - - if (parse_relo_and_apply(data, symbols, &shdr, insns, - map_data, nr_maps)) - continue; - } - } - - /* load programs */ - for (i = 1; i < ehdr.e_shnum; i++) { - - if (processed_sec[i]) - continue; - - if (get_sec(elf, i, &ehdr, &shname, &shdr, &data)) - continue; - - if (memcmp(shname, "kprobe/", 7) == 0 || - memcmp(shname, "kretprobe/", 10) == 0 || - memcmp(shname, "tracepoint/", 11) == 0 || - memcmp(shname, "raw_tracepoint/", 15) == 0 || - memcmp(shname, "xdp", 3) == 0 || - memcmp(shname, "perf_event", 10) == 0 || - memcmp(shname, "socket", 6) == 0 || - memcmp(shname, "cgroup/", 7) == 0 || - memcmp(shname, "sockops", 7) == 0 || - memcmp(shname, "sk_skb", 6) == 0 || - memcmp(shname, "sk_msg", 6) == 0) { - ret = load_and_attach(shname, data->d_buf, - data->d_size); - if (ret != 0) - goto done; - } - } - -done: - close(fd); - return ret; -} - -int load_bpf_file(char *path) -{ - return do_load_bpf_file(path, NULL); -} - -int load_bpf_file_fixup_map(const char *path, fixup_map_cb fixup_map) -{ - return do_load_bpf_file(path, fixup_map); -} diff --git a/samples/bpf/bpf_load.h b/samples/bpf/bpf_load.h deleted file mode 100644 index 4fcd258c616f..000000000000 --- a/samples/bpf/bpf_load.h +++ /dev/null @@ -1,57 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __BPF_LOAD_H -#define __BPF_LOAD_H - -#include - -#define MAX_MAPS 32 -#define MAX_PROGS 32 - -struct bpf_load_map_def { - unsigned int type; - unsigned int key_size; - unsigned int value_size; - unsigned int max_entries; - unsigned int map_flags; - unsigned int inner_map_idx; - unsigned int numa_node; -}; - -struct bpf_map_data { - int fd; - char *name; - size_t elf_offset; - struct bpf_load_map_def def; -}; - -typedef void (*fixup_map_cb)(struct bpf_map_data *map, int idx); - -extern int prog_fd[MAX_PROGS]; -extern int event_fd[MAX_PROGS]; -extern char bpf_log_buf[BPF_LOG_BUF_SIZE]; -extern int prog_cnt; - -/* There is a one-to-one mapping between map_fd[] and map_data[]. - * The map_data[] just contains more rich info on the given map. - */ -extern int map_fd[MAX_MAPS]; -extern struct bpf_map_data map_data[MAX_MAPS]; -extern int map_data_count; - -/* parses elf file compiled by llvm .c->.o - * . parses 'maps' section and creates maps via BPF syscall - * . parses 'license' section and passes it to syscall - * . parses elf relocations for BPF maps and adjusts BPF_LD_IMM64 insns by - * storing map_fd into insn->imm and marking such insns as BPF_PSEUDO_MAP_FD - * . loads eBPF programs via BPF syscall - * - * One ELF file can contain multiple BPF programs which will be loaded - * and their FDs stored stored in prog_fd array - * - * returns zero on success - */ -int load_bpf_file(char *path); -int load_bpf_file_fixup_map(const char *path, fixup_map_cb fixup_map); - -int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags); -#endif diff --git a/samples/bpf/xdp2skb_meta_kern.c b/samples/bpf/xdp2skb_meta_kern.c index 9b783316e860..d5631014a176 100644 --- a/samples/bpf/xdp2skb_meta_kern.c +++ b/samples/bpf/xdp2skb_meta_kern.c @@ -6,7 +6,7 @@ * This uses the XDP data_meta infrastructure, and is a cooperation * between two bpf-programs (1) XDP and (2) clsact at TC-ingress hook. * - * Notice: This example does not use the BPF C-loader (bpf_load.c), + * Notice: This example does not use the BPF C-loader, * but instead rely on the iproute2 TC tool for loading BPF-objects. */ #include