diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 3c3188a57016..886727ae8529 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -96,6 +96,10 @@ #include #include +#ifdef HAVE_BPF_SKEL +#include "util/bpf_skel/bperf_cgroup.h" +#endif + #define DEFAULT_SEPARATOR " " #define FREEZE_ON_SMI_PATH "bus/event_source/devices/cpu/freeze_on_smi" @@ -2852,7 +2856,14 @@ int cmd_stat(int argc, const char **argv) goto out; } } - +#ifdef HAVE_BPF_SKEL + if (target.use_bpf && nr_cgroups && + (evsel_list->core.nr_entries / nr_cgroups) > BPERF_CGROUP__MAX_EVENTS) { + pr_warning("Disabling BPF counters due to more events (%d) than the max (%d)\n", + evsel_list->core.nr_entries / nr_cgroups, BPERF_CGROUP__MAX_EVENTS); + target.use_bpf = false; + } +#endif // HAVE_BPF_SKEL evlist__warn_user_requested_cpus(evsel_list, target.cpu_list); evlist__for_each_entry(evsel_list, counter) { diff --git a/tools/perf/util/bpf_counter_cgroup.c b/tools/perf/util/bpf_counter_cgroup.c index 690be3ce3e11..68bd994c8880 100644 --- a/tools/perf/util/bpf_counter_cgroup.c +++ b/tools/perf/util/bpf_counter_cgroup.c @@ -27,6 +27,7 @@ #include "cpumap.h" #include "thread_map.h" +#include "bpf_skel/bperf_cgroup.h" #include "bpf_skel/bperf_cgroup.skel.h" static struct perf_event_attr cgrp_switch_attr = { @@ -42,6 +43,55 @@ static struct bperf_cgroup_bpf *skel; #define FD(evt, cpu) (*(int *)xyarray__entry(evt->core.fd, cpu, 0)) +static void setup_rodata(struct bperf_cgroup_bpf *sk, int evlist_size) +{ + int map_size, total_cpus = cpu__max_cpu().cpu; + + sk->rodata->num_cpus = total_cpus; + sk->rodata->num_events = evlist_size / nr_cgroups; + + if (cgroup_is_v2("perf_event") > 0) + sk->rodata->use_cgroup_v2 = 1; + + BUG_ON(evlist_size % nr_cgroups != 0); + + /* we need one copy of events per cpu for reading */ + map_size = total_cpus * evlist_size / nr_cgroups; + bpf_map__set_max_entries(sk->maps.events, map_size); + bpf_map__set_max_entries(sk->maps.cgrp_idx, nr_cgroups); + /* previous result is saved in a per-cpu array */ + map_size = evlist_size / nr_cgroups; + bpf_map__set_max_entries(sk->maps.prev_readings, map_size); + /* cgroup result needs all events (per-cpu) */ + map_size = evlist_size; + bpf_map__set_max_entries(sk->maps.cgrp_readings, map_size); +} + +static void test_max_events_program_load(void) +{ +#ifndef NDEBUG + /* + * Test that the program verifies with the maximum number of events. If + * this test fails unfortunately perf needs recompiling with a lower + * BPERF_CGROUP__MAX_EVENTS to avoid BPF verifier issues. + */ + int err, max_events = BPERF_CGROUP__MAX_EVENTS * nr_cgroups; + struct bperf_cgroup_bpf *test_skel = bperf_cgroup_bpf__open(); + + if (!test_skel) { + pr_err("Failed to open cgroup skeleton\n"); + return; + } + setup_rodata(test_skel, max_events); + err = bperf_cgroup_bpf__load(test_skel); + if (err) { + pr_err("Failed to load cgroup skeleton with max events %d.\n", + BPERF_CGROUP__MAX_EVENTS); + } + bperf_cgroup_bpf__destroy(test_skel); +#endif +} + static int bperf_load_program(struct evlist *evlist) { struct bpf_link *link; @@ -50,35 +100,18 @@ static int bperf_load_program(struct evlist *evlist) int i, j; struct perf_cpu cpu; int total_cpus = cpu__max_cpu().cpu; - int map_size, map_fd; - int prog_fd, err; + int map_fd, prog_fd, err; + + set_max_rlimit(); + + test_max_events_program_load(); skel = bperf_cgroup_bpf__open(); if (!skel) { pr_err("Failed to open cgroup skeleton\n"); return -1; } - - skel->rodata->num_cpus = total_cpus; - skel->rodata->num_events = evlist->core.nr_entries / nr_cgroups; - - if (cgroup_is_v2("perf_event") > 0) - skel->rodata->use_cgroup_v2 = 1; - - BUG_ON(evlist->core.nr_entries % nr_cgroups != 0); - - /* we need one copy of events per cpu for reading */ - map_size = total_cpus * evlist->core.nr_entries / nr_cgroups; - bpf_map__set_max_entries(skel->maps.events, map_size); - bpf_map__set_max_entries(skel->maps.cgrp_idx, nr_cgroups); - /* previous result is saved in a per-cpu array */ - map_size = evlist->core.nr_entries / nr_cgroups; - bpf_map__set_max_entries(skel->maps.prev_readings, map_size); - /* cgroup result needs all events (per-cpu) */ - map_size = evlist->core.nr_entries; - bpf_map__set_max_entries(skel->maps.cgrp_readings, map_size); - - set_max_rlimit(); + setup_rodata(skel, evlist->core.nr_entries); err = bperf_cgroup_bpf__load(skel); if (err) { diff --git a/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c index 18ab4d9b49ff..c2298a2decc9 100644 --- a/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c +++ b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c @@ -1,14 +1,12 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) // Copyright (c) 2021 Facebook // Copyright (c) 2021 Google +#include "bperf_cgroup.h" #include "vmlinux.h" #include #include #include -#define MAX_LEVELS 10 // max cgroup hierarchy level: arbitrary -#define MAX_EVENTS 1024 // max events per cgroup: arbitrary - // NOTE: many of map and global data will be modified before loading // from the userspace (perf tool) using the skeleton helpers. @@ -97,7 +95,7 @@ static inline int get_cgroup_v1_idx(__u32 *cgrps, int size) cgrp = BPF_CORE_READ(p, cgroups, subsys[perf_subsys_id], cgroup); level = BPF_CORE_READ(cgrp, level); - for (cnt = 0; i < MAX_LEVELS; i++) { + for (cnt = 0; i < BPERF_CGROUP__MAX_LEVELS; i++) { __u64 cgrp_id; if (i > level) @@ -123,7 +121,7 @@ static inline int get_cgroup_v2_idx(__u32 *cgrps, int size) __u32 *elem; int cnt; - for (cnt = 0; i < MAX_LEVELS; i++) { + for (cnt = 0; i < BPERF_CGROUP__MAX_LEVELS; i++) { __u64 cgrp_id = bpf_get_current_ancestor_cgroup_id(i); if (cgrp_id == 0) @@ -148,17 +146,17 @@ static int bperf_cgroup_count(void) register int c = 0; struct bpf_perf_event_value val, delta, *prev_val, *cgrp_val; __u32 cpu = bpf_get_smp_processor_id(); - __u32 cgrp_idx[MAX_LEVELS]; + __u32 cgrp_idx[BPERF_CGROUP__MAX_LEVELS]; int cgrp_cnt; __u32 key, cgrp; long err; if (use_cgroup_v2) - cgrp_cnt = get_cgroup_v2_idx(cgrp_idx, MAX_LEVELS); + cgrp_cnt = get_cgroup_v2_idx(cgrp_idx, BPERF_CGROUP__MAX_LEVELS); else - cgrp_cnt = get_cgroup_v1_idx(cgrp_idx, MAX_LEVELS); + cgrp_cnt = get_cgroup_v1_idx(cgrp_idx, BPERF_CGROUP__MAX_LEVELS); - for ( ; idx < MAX_EVENTS; idx++) { + for ( ; idx < BPERF_CGROUP__MAX_EVENTS; idx++) { if (idx == num_events) break; @@ -186,7 +184,7 @@ static int bperf_cgroup_count(void) delta.enabled = val.enabled - prev_val->enabled; delta.running = val.running - prev_val->running; - for (c = 0; c < MAX_LEVELS; c++) { + for (c = 0; c < BPERF_CGROUP__MAX_LEVELS; c++) { if (c == cgrp_cnt) break; diff --git a/tools/perf/util/bpf_skel/bperf_cgroup.h b/tools/perf/util/bpf_skel/bperf_cgroup.h new file mode 100644 index 000000000000..3fb84b19d39a --- /dev/null +++ b/tools/perf/util/bpf_skel/bperf_cgroup.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Data structures shared between BPF and tools. */ +#ifndef __BPERF_CGROUP_H +#define __BPERF_CGROUP_H + +// These constants impact code size of bperf_cgroup.bpf.c that may result in BPF +// verifier issues. They are exposed to control the size and also to disable BPF +// counters when the number of user events is too large. + +// max cgroup hierarchy level: arbitrary +#define BPERF_CGROUP__MAX_LEVELS 10 +// max events per cgroup: arbitrary +#define BPERF_CGROUP__MAX_EVENTS 128 + +#endif /* __BPERF_CGROUP_H */