perf report: Add parallelism filter

Add parallelism filter that can be used to look at specific parallelism
levels only. The format is the same as cpu lists. For example:

Only single-threaded samples: --parallelism=1
Low parallelism only: --parallelism=1-4
High parallelism only: --parallelism=64-128

Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Link: https://lore.kernel.org/r/e61348985ff0a6a14b07c39e880edbd60a8f8635.1739437531.git.dvyukov@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
This commit is contained in:
Dmitry Vyukov 2025-02-13 10:08:17 +01:00 committed by Namhyung Kim
parent 216f8a970c
commit 61b6b31c2f
7 changed files with 91 additions and 2 deletions

View File

@ -1390,6 +1390,8 @@ int cmd_report(int argc, const char **argv)
symbol__config_symfs),
OPT_STRING('C', "cpu", &report.cpu_list, "cpu",
"list of cpus to profile"),
OPT_STRING(0, "parallelism", &symbol_conf.parallelism_list_str, "parallelism",
"only consider these parallelism levels (cpu set format)"),
OPT_BOOLEAN('I', "show-info", &report.show_full_info,
"Display extended information about perf.data file"),
OPT_BOOLEAN(0, "source", &annotate_opts.annotate_src,
@ -1721,7 +1723,8 @@ int cmd_report(int argc, const char **argv)
}
if (report.disable_order || !perf_session__has_switch_events(session)) {
if ((sort_order && strstr(sort_order, "parallelism")) ||
if (symbol_conf.parallelism_list_str ||
(sort_order && strstr(sort_order, "parallelism")) ||
(field_order && strstr(field_order, "parallelism"))) {
if (report.disable_order)
ui__error("Use of parallelism is incompatible with --disable-order.\n");

View File

@ -769,6 +769,8 @@ int machine__resolve(struct machine *machine, struct addr_location *al,
/* Account for possible out-of-order switch events. */
al->parallelism = max(1, min(machine->parallelism, machine__nr_cpus_avail(machine)));
if (test_bit(al->parallelism, symbol_conf.parallelism_filter))
al->filtered |= (1 << HIST_FILTER__PARALLELISM);
if (al->map) {
if (symbol_conf.dso_list &&

View File

@ -43,6 +43,8 @@ static bool hists__filter_entry_by_symbol(struct hists *hists,
struct hist_entry *he);
static bool hists__filter_entry_by_socket(struct hists *hists,
struct hist_entry *he);
static bool hists__filter_entry_by_parallelism(struct hists *hists,
struct hist_entry *he);
u16 hists__col_len(struct hists *hists, enum hist_column col)
{
@ -1457,6 +1459,10 @@ static void hist_entry__check_and_remove_filter(struct hist_entry *he,
if (symbol_conf.sym_list == NULL)
return;
break;
case HIST_FILTER__PARALLELISM:
if (__bitmap_weight(symbol_conf.parallelism_filter, MAX_NR_CPUS + 1) == 0)
return;
break;
case HIST_FILTER__PARENT:
case HIST_FILTER__GUEST:
case HIST_FILTER__HOST:
@ -1515,6 +1521,9 @@ static void hist_entry__apply_hierarchy_filters(struct hist_entry *he)
hist_entry__check_and_remove_filter(he, HIST_FILTER__SYMBOL,
perf_hpp__is_sym_entry);
hist_entry__check_and_remove_filter(he, HIST_FILTER__PARALLELISM,
perf_hpp__is_parallelism_entry);
hists__apply_filters(he->hists, he);
}
@ -1711,6 +1720,7 @@ static void hists__apply_filters(struct hists *hists, struct hist_entry *he)
hists__filter_entry_by_thread(hists, he);
hists__filter_entry_by_symbol(hists, he);
hists__filter_entry_by_socket(hists, he);
hists__filter_entry_by_parallelism(hists, he);
}
int hists__collapse_resort(struct hists *hists, struct ui_progress *prog)
@ -2197,6 +2207,16 @@ static bool hists__filter_entry_by_socket(struct hists *hists,
return false;
}
static bool hists__filter_entry_by_parallelism(struct hists *hists,
struct hist_entry *he)
{
if (test_bit(he->parallelism, hists->parallelism_filter)) {
he->filtered |= (1 << HIST_FILTER__PARALLELISM);
return true;
}
return false;
}
typedef bool (*filter_fn_t)(struct hists *hists, struct hist_entry *he);
static void hists__filter_by_type(struct hists *hists, int type, filter_fn_t filter)
@ -2366,6 +2386,16 @@ void hists__filter_by_socket(struct hists *hists)
hists__filter_entry_by_socket);
}
void hists__filter_by_parallelism(struct hists *hists)
{
if (symbol_conf.report_hierarchy)
hists__filter_hierarchy(hists, HIST_FILTER__PARALLELISM,
hists->parallelism_filter);
else
hists__filter_by_type(hists, HIST_FILTER__PARALLELISM,
hists__filter_entry_by_parallelism);
}
void events_stats__inc(struct events_stats *stats, u32 type)
{
++stats->nr_events[0];
@ -2872,6 +2902,7 @@ int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list)
hists->entries = RB_ROOT_CACHED;
mutex_init(&hists->lock);
hists->socket_filter = -1;
hists->parallelism_filter = symbol_conf.parallelism_filter;
hists->hpp_list = hpp_list;
INIT_LIST_HEAD(&hists->hpp_formats);
return 0;

View File

@ -31,6 +31,7 @@ enum hist_filter {
HIST_FILTER__HOST,
HIST_FILTER__SOCKET,
HIST_FILTER__C2C,
HIST_FILTER__PARALLELISM,
};
typedef u16 filter_mask_t;
@ -112,6 +113,7 @@ struct hists {
const struct dso *dso_filter;
const char *uid_filter_str;
const char *symbol_filter_str;
unsigned long *parallelism_filter;
struct mutex lock;
struct hists_stats stats;
u64 event_stream;
@ -388,11 +390,13 @@ void hists__filter_by_dso(struct hists *hists);
void hists__filter_by_thread(struct hists *hists);
void hists__filter_by_symbol(struct hists *hists);
void hists__filter_by_socket(struct hists *hists);
void hists__filter_by_parallelism(struct hists *hists);
static inline bool hists__has_filter(struct hists *hists)
{
return hists->thread_filter || hists->dso_filter ||
hists->symbol_filter_str || (hists->socket_filter > -1);
hists->symbol_filter_str || (hists->socket_filter > -1) ||
hists->parallelism_filter;
}
u16 hists__col_len(struct hists *hists, enum hist_column col);

View File

@ -900,6 +900,16 @@ sort__parallelism_cmp(struct hist_entry *left, struct hist_entry *right)
return right->parallelism - left->parallelism;
}
static int hist_entry__parallelism_filter(struct hist_entry *he, int type, const void *arg)
{
const unsigned long *parallelism_filter = arg;
if (type != HIST_FILTER__PARALLELISM)
return -1;
return test_bit(he->parallelism, parallelism_filter);
}
static int hist_entry__parallelism_snprintf(struct hist_entry *he, char *bf,
size_t size, unsigned int width)
{
@ -909,6 +919,7 @@ static int hist_entry__parallelism_snprintf(struct hist_entry *he, char *bf,
struct sort_entry sort_parallelism = {
.se_header = "Parallelism",
.se_cmp = sort__parallelism_cmp,
.se_filter = hist_entry__parallelism_filter,
.se_snprintf = hist_entry__parallelism_snprintf,
.se_width_idx = HISTC_PARALLELISM,
};

View File

@ -18,6 +18,7 @@
#include "annotate.h"
#include "build-id.h"
#include "cap.h"
#include "cpumap.h"
#include "dso.h"
#include "util.h" // lsdir()
#include "debug.h"
@ -2471,6 +2472,36 @@ int symbol__annotation_init(void)
return 0;
}
static int setup_parallelism_bitmap(void)
{
struct perf_cpu_map *map;
struct perf_cpu cpu;
int i, err = -1;
if (symbol_conf.parallelism_list_str == NULL)
return 0;
map = perf_cpu_map__new(symbol_conf.parallelism_list_str);
if (map == NULL) {
pr_err("failed to parse parallelism filter list\n");
return -1;
}
bitmap_fill(symbol_conf.parallelism_filter, MAX_NR_CPUS + 1);
perf_cpu_map__for_each_cpu(cpu, i, map) {
if (cpu.cpu <= 0 || cpu.cpu > MAX_NR_CPUS) {
pr_err("Requested parallelism level %d is invalid.\n", cpu.cpu);
goto out_delete_map;
}
__clear_bit(cpu.cpu, symbol_conf.parallelism_filter);
}
err = 0;
out_delete_map:
perf_cpu_map__put(map);
return err;
}
int symbol__init(struct perf_env *env)
{
const char *symfs;
@ -2490,6 +2521,9 @@ int symbol__init(struct perf_env *env)
return -1;
}
if (setup_parallelism_bitmap())
return -1;
if (setup_list(&symbol_conf.dso_list,
symbol_conf.dso_list_str, "dso") < 0)
return -1;

View File

@ -3,6 +3,8 @@
#define __PERF_SYMBOL_CONF 1
#include <stdbool.h>
#include <linux/bitmap.h>
#include "perf.h"
struct strlist;
struct intlist;
@ -62,6 +64,7 @@ struct symbol_conf {
*pid_list_str,
*tid_list_str,
*sym_list_str,
*parallelism_list_str,
*col_width_list_str,
*bt_stop_list_str;
const char *addr2line_path;
@ -82,6 +85,7 @@ struct symbol_conf {
int pad_output_len_dso;
int group_sort_idx;
int addr_range;
DECLARE_BITMAP(parallelism_filter, MAX_NR_CPUS + 1);
};
extern struct symbol_conf symbol_conf;