perf report: Add machine parallelism

Add calculation of the current parallelism level (number of threads actively
running on CPUs). The parallelism level can be shown in reports on its own,
and to calculate latency overheads.

Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Link: https://lore.kernel.org/r/0f8c1b8eb12619029e31b3d5c0346f4616a5aeda.1739437531.git.dvyukov@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
This commit is contained in:
Dmitry Vyukov 2025-02-13 10:08:14 +01:00 committed by Namhyung Kim
parent 20600b8aab
commit f13bc61b2e
6 changed files with 20 additions and 0 deletions

View File

@ -1568,6 +1568,7 @@ int cmd_report(int argc, const char **argv)
report.tool.cgroup = perf_event__process_cgroup;
report.tool.exit = perf_event__process_exit;
report.tool.fork = perf_event__process_fork;
report.tool.context_switch = perf_event__process_switch;
report.tool.lost = perf_event__process_lost;
report.tool.read = process_read_event;
report.tool.attr = process_attr;

View File

@ -17,6 +17,7 @@ void addr_location__init(struct addr_location *al)
al->cpumode = 0;
al->cpu = 0;
al->socket = 0;
al->parallelism = 1;
}
/*

View File

@ -21,6 +21,8 @@ struct addr_location {
u8 cpumode;
s32 cpu;
s32 socket;
/* Same as machine.parallelism but within [1, nr_cpus]. */
int parallelism;
};
void addr_location__init(struct addr_location *al);

View File

@ -767,6 +767,9 @@ int machine__resolve(struct machine *machine, struct addr_location *al,
al->socket = env->cpu[al->cpu].socket_id;
}
/* Account for possible out-of-order switch events. */
al->parallelism = max(1, min(machine->parallelism, machine__nr_cpus_avail(machine)));
if (al->map) {
if (symbol_conf.dso_list &&
(!dso || !(strlist__has_entry(symbol_conf.dso_list,

View File

@ -94,6 +94,8 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
machine->comm_exec = false;
machine->kernel_start = 0;
machine->vmlinux_map = NULL;
/* There is no initial context switch in, so we start at 1. */
machine->parallelism = 1;
machine->root_dir = strdup(root_dir);
if (machine->root_dir == NULL)
@ -677,8 +679,11 @@ int machine__process_aux_output_hw_id_event(struct machine *machine __maybe_unus
int machine__process_switch_event(struct machine *machine __maybe_unused,
union perf_event *event)
{
bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
if (dump_trace)
perf_event__fprintf_switch(event, stdout);
machine->parallelism += out ? -1 : 1;
return 0;
}
@ -1880,6 +1885,8 @@ int machine__process_exit_event(struct machine *machine, union perf_event *event
if (dump_trace)
perf_event__fprintf_task(event, stdout);
/* There is no context switch out before exit, so we decrement here. */
machine->parallelism--;
if (thread != NULL) {
if (symbol_conf.keep_exited_threads)
thread__set_exited(thread, /*exited=*/true);

View File

@ -50,6 +50,12 @@ struct machine {
u64 text_start;
u64 text_end;
} sched, lock, traceiter, trace;
/*
* The current parallelism level (number of threads that run on CPUs).
* This value can be less than 1, or larger than the total number
* of CPUs, if events are poorly ordered.
*/
int parallelism;
pid_t *current_tid;
size_t current_tid_sz;
union { /* Tool specific area */