perf sched stats: Add record and rawdump support

Define new, perf tool only, sample types and their layouts. Add logic
to parse /proc/schedstat, convert it to perf sample format and save
samples to perf.data file with `perf sched stats record` command.

Also add logic to read perf.data file, interpret schedstat samples and
print rawdump of samples with `perf script -D`.

Note that, /proc/schedstat file output is standardized with version
number. The patch supports v15 but older or newer version can be added
easily.

Co-developed-by: Ravi Bangoria <ravi.bangoria@amd.com>
Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>
Signed-off-by: Swapnil Sapkal <swapnil.sapkal@amd.com>
Tested-by: Chen Yu <yu.c.chen@intel.com>
Tested-by: James Clark <james.clark@linaro.org>
Acked-by: Ian Rogers <irogers@google.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Anubhav Shelat <ashelat@redhat.com>
Cc: Ben Gainey <ben.gainey@arm.com>
Cc: Blake Jones <blakejones@google.com>
Cc: Chun-Tse Shao <ctshao@google.com>
Cc: David Vernet <void@manifault.com>
Cc: Dmitriy Vyukov <dvyukov@google.com>
Cc: Dr. David Alan Gilbert <linux@treblig.org>
Cc: Gautham Shenoy <gautham.shenoy@amd.com>
Cc: Graham Woodward <graham.woodward@arm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: K Prateek Nayak <kprateek.nayak@amd.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@arm.com>
Cc: Madadi Vineeth Reddy <vineethr@linux.ibm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Sandipan Das <sandipan.das@amd.com>
Cc: Santosh Shukla <santosh.shukla@amd.com>
Cc: Shrikanth Hegde <sshegde@linux.ibm.com>
Cc: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Falcon <thomas.falcon@intel.com>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Yang Jihong <yangjihong@bytedance.com>
Cc: Yujie Liu <yujie.liu@intel.com>
Cc: Zhongqiu Han <quic_zhonhan@quicinc.com>
[ PRIu64 needs uint64_t, not 'unsigned long' to work on both 32-bit and 64-bit ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
Swapnil Sapkal 2026-01-19 17:58:25 +00:00 committed by Arnaldo Carvalho de Melo
parent d40c68a49f
commit c3030995f2
13 changed files with 682 additions and 2 deletions

View File

@ -211,6 +211,8 @@ SYNOPSIS
struct perf_record_header_feature;
struct perf_record_compressed;
struct perf_record_compressed2;
struct perf_record_schedstat_cpu;
struct perf_record_schedstat_domain;
--
DESCRIPTION

View File

@ -179,6 +179,7 @@ install_lib: libs
cp -fpR $(LIBPERF_ALL) $(DESTDIR)$(libdir_SQ)
HDRS := bpf_perf.h core.h cpumap.h threadmap.h evlist.h evsel.h event.h mmap.h
HDRS += schedstat-v15.h
INTERNAL_HDRS := cpumap.h evlist.h evsel.h lib.h mmap.h rc_check.h threadmap.h xyarray.h
INSTALL_HDRS_PFX := $(DESTDIR)$(prefix)/include/perf

View File

@ -496,6 +496,43 @@ struct perf_record_bpf_metadata {
struct perf_record_bpf_metadata_entry entries[];
};
struct perf_record_schedstat_cpu_v15 {
#define CPU_FIELD(_type, _name, _desc, _format, _is_pct, _pct_of, _ver) _type _name
#include "schedstat-v15.h"
#undef CPU_FIELD
};
struct perf_record_schedstat_cpu {
struct perf_event_header header;
__u64 timestamp;
__u32 cpu;
__u16 version;
/* Padding */
char __pad[2];
union {
struct perf_record_schedstat_cpu_v15 v15;
};
};
struct perf_record_schedstat_domain_v15 {
#define DOMAIN_FIELD(_type, _name, _desc, _format, _is_jiffies, _ver) _type _name
#include "schedstat-v15.h"
#undef DOMAIN_FIELD
};
#define DOMAIN_NAME_LEN 16
struct perf_record_schedstat_domain {
struct perf_event_header header;
__u64 timestamp;
__u32 cpu;
__u16 version;
__u16 domain;
union {
struct perf_record_schedstat_domain_v15 v15;
};
};
enum perf_user_event_type { /* above any possible kernel type */
PERF_RECORD_USER_TYPE_START = 64,
PERF_RECORD_HEADER_ATTR = 64,
@ -519,6 +556,8 @@ enum perf_user_event_type { /* above any possible kernel type */
PERF_RECORD_FINISHED_INIT = 82,
PERF_RECORD_COMPRESSED2 = 83,
PERF_RECORD_BPF_METADATA = 84,
PERF_RECORD_SCHEDSTAT_CPU = 85,
PERF_RECORD_SCHEDSTAT_DOMAIN = 86,
PERF_RECORD_HEADER_MAX
};
@ -562,6 +601,8 @@ union perf_event {
struct perf_record_compressed pack;
struct perf_record_compressed2 pack2;
struct perf_record_bpf_metadata bpf_metadata;
struct perf_record_schedstat_cpu schedstat_cpu;
struct perf_record_schedstat_domain schedstat_domain;
};
#endif /* __LIBPERF_EVENT_H */

View File

@ -0,0 +1,146 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifdef CPU_FIELD
CPU_FIELD(__u32, yld_count, "sched_yield() count",
"%11u", false, yld_count, v15);
CPU_FIELD(__u32, array_exp, "Legacy counter can be ignored",
"%11u", false, array_exp, v15);
CPU_FIELD(__u32, sched_count, "schedule() called",
"%11u", false, sched_count, v15);
CPU_FIELD(__u32, sched_goidle, "schedule() left the processor idle",
"%11u", true, sched_count, v15);
CPU_FIELD(__u32, ttwu_count, "try_to_wake_up() was called",
"%11u", false, ttwu_count, v15);
CPU_FIELD(__u32, ttwu_local, "try_to_wake_up() was called to wake up the local cpu",
"%11u", true, ttwu_count, v15);
CPU_FIELD(__u64, rq_cpu_time, "total runtime by tasks on this processor (in jiffies)",
"%11llu", false, rq_cpu_time, v15);
CPU_FIELD(__u64, run_delay, "total waittime by tasks on this processor (in jiffies)",
"%11llu", true, rq_cpu_time, v15);
CPU_FIELD(__u64, pcount, "total timeslices run on this cpu",
"%11llu", false, pcount, v15);
#endif
#ifdef DOMAIN_FIELD
#ifdef DOMAIN_CATEGORY
DOMAIN_CATEGORY(" <Category idle> ");
#endif
DOMAIN_FIELD(__u32, idle_lb_count,
"load_balance() count on cpu idle", "%11u", true, v15);
DOMAIN_FIELD(__u32, idle_lb_balanced,
"load_balance() found balanced on cpu idle", "%11u", true, v15);
DOMAIN_FIELD(__u32, idle_lb_failed,
"load_balance() move task failed on cpu idle", "%11u", true, v15);
DOMAIN_FIELD(__u32, idle_lb_imbalance,
"imbalance sum on cpu idle", "%11u", false, v15);
DOMAIN_FIELD(__u32, idle_lb_gained,
"pull_task() count on cpu idle", "%11u", false, v15);
DOMAIN_FIELD(__u32, idle_lb_hot_gained,
"pull_task() when target task was cache-hot on cpu idle", "%11u", false, v15);
DOMAIN_FIELD(__u32, idle_lb_nobusyq,
"load_balance() failed to find busier queue on cpu idle", "%11u", true, v15);
DOMAIN_FIELD(__u32, idle_lb_nobusyg,
"load_balance() failed to find busier group on cpu idle", "%11u", true, v15);
#ifdef DERIVED_CNT_FIELD
DERIVED_CNT_FIELD(idle_lb_success_count, "load_balance() success count on cpu idle", "%11u",
idle_lb_count, idle_lb_balanced, idle_lb_failed, v15);
#endif
#ifdef DERIVED_AVG_FIELD
DERIVED_AVG_FIELD(idle_lb_avg_pulled,
"avg task pulled per successful lb attempt (cpu idle)", "%11.2Lf",
idle_lb_count, idle_lb_balanced, idle_lb_failed, idle_lb_gained, v15);
#endif
#ifdef DOMAIN_CATEGORY
DOMAIN_CATEGORY(" <Category busy> ");
#endif
DOMAIN_FIELD(__u32, busy_lb_count,
"load_balance() count on cpu busy", "%11u", true, v15);
DOMAIN_FIELD(__u32, busy_lb_balanced,
"load_balance() found balanced on cpu busy", "%11u", true, v15);
DOMAIN_FIELD(__u32, busy_lb_failed,
"load_balance() move task failed on cpu busy", "%11u", true, v15);
DOMAIN_FIELD(__u32, busy_lb_imbalance,
"imbalance sum on cpu busy", "%11u", false, v15);
DOMAIN_FIELD(__u32, busy_lb_gained,
"pull_task() count on cpu busy", "%11u", false, v15);
DOMAIN_FIELD(__u32, busy_lb_hot_gained,
"pull_task() when target task was cache-hot on cpu busy", "%11u", false, v15);
DOMAIN_FIELD(__u32, busy_lb_nobusyq,
"load_balance() failed to find busier queue on cpu busy", "%11u", true, v15);
DOMAIN_FIELD(__u32, busy_lb_nobusyg,
"load_balance() failed to find busier group on cpu busy", "%11u", true, v15);
#ifdef DERIVED_CNT_FIELD
DERIVED_CNT_FIELD(busy_lb_success_count, "load_balance() success count on cpu busy", "%11u",
busy_lb_count, busy_lb_balanced, busy_lb_failed, v15);
#endif
#ifdef DERIVED_AVG_FIELD
DERIVED_AVG_FIELD(busy_lb_avg_pulled,
"avg task pulled per successful lb attempt (cpu busy)", "%11.2Lf",
busy_lb_count, busy_lb_balanced, busy_lb_failed, busy_lb_gained, v15);
#endif
#ifdef DOMAIN_CATEGORY
DOMAIN_CATEGORY(" <Category newidle> ");
#endif
DOMAIN_FIELD(__u32, newidle_lb_count,
"load_balance() count on cpu newly idle", "%11u", true, v15);
DOMAIN_FIELD(__u32, newidle_lb_balanced,
"load_balance() found balanced on cpu newly idle", "%11u", true, v15);
DOMAIN_FIELD(__u32, newidle_lb_failed,
"load_balance() move task failed on cpu newly idle", "%11u", true, v15);
DOMAIN_FIELD(__u32, newidle_lb_imbalance,
"imbalance sum on cpu newly idle", "%11u", false, v15);
DOMAIN_FIELD(__u32, newidle_lb_gained,
"pull_task() count on cpu newly idle", "%11u", false, v15);
DOMAIN_FIELD(__u32, newidle_lb_hot_gained,
"pull_task() when target task was cache-hot on cpu newly idle", "%11u", false, v15);
DOMAIN_FIELD(__u32, newidle_lb_nobusyq,
"load_balance() failed to find busier queue on cpu newly idle", "%11u", true, v15);
DOMAIN_FIELD(__u32, newidle_lb_nobusyg,
"load_balance() failed to find busier group on cpu newly idle", "%11u", true, v15);
#ifdef DERIVED_CNT_FIELD
DERIVED_CNT_FIELD(newidle_lb_success_count,
"load_balance() success count on cpu newly idle", "%11u",
newidle_lb_count, newidle_lb_balanced, newidle_lb_failed, v15);
#endif
#ifdef DERIVED_AVG_FIELD
DERIVED_AVG_FIELD(newidle_lb_avg_pulled,
"avg task pulled per successful lb attempt (cpu newly idle)", "%11.2Lf",
newidle_lb_count, newidle_lb_balanced, newidle_lb_failed, newidle_lb_gained, v15);
#endif
#ifdef DOMAIN_CATEGORY
DOMAIN_CATEGORY(" <Category active_load_balance()> ");
#endif
DOMAIN_FIELD(__u32, alb_count,
"active_load_balance() count", "%11u", false, v15);
DOMAIN_FIELD(__u32, alb_failed,
"active_load_balance() move task failed", "%11u", false, v15);
DOMAIN_FIELD(__u32, alb_pushed,
"active_load_balance() successfully moved a task", "%11u", false, v15);
#ifdef DOMAIN_CATEGORY
DOMAIN_CATEGORY(" <Category sched_balance_exec()> ");
#endif
DOMAIN_FIELD(__u32, sbe_count,
"sbe_count is not used", "%11u", false, v15);
DOMAIN_FIELD(__u32, sbe_balanced,
"sbe_balanced is not used", "%11u", false, v15);
DOMAIN_FIELD(__u32, sbe_pushed,
"sbe_pushed is not used", "%11u", false, v15);
#ifdef DOMAIN_CATEGORY
DOMAIN_CATEGORY(" <Category sched_balance_fork()> ");
#endif
DOMAIN_FIELD(__u32, sbf_count,
"sbf_count is not used", "%11u", false, v15);
DOMAIN_FIELD(__u32, sbf_balanced,
"sbf_balanced is not used", "%11u", false, v15);
DOMAIN_FIELD(__u32, sbf_pushed,
"sbf_pushed is not used", "%11u", false, v15);
#ifdef DOMAIN_CATEGORY
DOMAIN_CATEGORY(" <Wakeup Info> ");
#endif
DOMAIN_FIELD(__u32, ttwu_wake_remote,
"try_to_wake_up() awoke a task that last ran on a diff cpu", "%11u", false, v15);
DOMAIN_FIELD(__u32, ttwu_move_affine,
"try_to_wake_up() moved task because cache-cold on own cpu", "%11u", false, v15);
DOMAIN_FIELD(__u32, ttwu_move_balance,
"try_to_wake_up() started passive balancing", "%11u", false, v15);
#endif /* DOMAIN_FIELD */

View File

@ -2657,6 +2657,8 @@ int cmd_inject(int argc, const char **argv)
inject.tool.compressed = perf_event__repipe_op4_synth;
inject.tool.auxtrace = perf_event__repipe_auxtrace;
inject.tool.bpf_metadata = perf_event__repipe_op2_synth;
inject.tool.schedstat_cpu = perf_event__repipe_op2_synth;
inject.tool.schedstat_domain = perf_event__repipe_op2_synth;
inject.tool.dont_split_sample_group = true;
inject.tool.merge_deferred_callchains = false;
inject.session = __perf_session__new(&data, &inject.tool,

View File

@ -28,6 +28,8 @@
#include "util/debug.h"
#include "util/event.h"
#include "util/util.h"
#include "util/synthetic-events.h"
#include "util/target.h"
#include <linux/kernel.h>
#include <linux/log2.h>
@ -55,6 +57,7 @@
#define MAX_PRIO 140
static const char *cpu_list;
static struct perf_cpu_map *user_requested_cpus;
static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
struct sched_atom;
@ -236,6 +239,9 @@ struct perf_sched {
volatile bool thread_funcs_exit;
const char *prio_str;
DECLARE_BITMAP(prio_bitmap, MAX_PRIO);
struct perf_session *session;
struct perf_data *data;
};
/* per thread run time data */
@ -3734,6 +3740,195 @@ static void setup_sorting(struct perf_sched *sched, const struct option *options
sort_dimension__add("pid", &sched->cmp_pid);
}
static int process_synthesized_schedstat_event(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
struct machine *machine __maybe_unused)
{
struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
if (perf_data__write(sched->data, event, event->header.size) <= 0) {
pr_err("failed to write perf data, error: %m\n");
return -1;
}
sched->session->header.data_size += event->header.size;
return 0;
}
static void sighandler(int sig __maybe_unused)
{
}
static int enable_sched_schedstats(int *reset)
{
char path[PATH_MAX];
FILE *fp;
char ch;
snprintf(path, PATH_MAX, "%s/sys/kernel/sched_schedstats", procfs__mountpoint());
fp = fopen(path, "w+");
if (!fp) {
pr_err("Failed to open %s\n", path);
return -1;
}
ch = getc(fp);
if (ch == '0') {
*reset = 1;
rewind(fp);
putc('1', fp);
fclose(fp);
}
return 0;
}
static int disable_sched_schedstat(void)
{
char path[PATH_MAX];
FILE *fp;
snprintf(path, PATH_MAX, "%s/sys/kernel/sched_schedstats", procfs__mountpoint());
fp = fopen(path, "w");
if (!fp) {
pr_err("Failed to open %s\n", path);
return -1;
}
putc('0', fp);
fclose(fp);
return 0;
}
/* perf.data or any other output file name used by stats subcommand (only). */
const char *output_name;
static int perf_sched__schedstat_record(struct perf_sched *sched,
int argc, const char **argv)
{
struct perf_session *session;
struct target target = {};
struct evlist *evlist;
int reset = 0;
int err = 0;
int fd;
struct perf_data data = {
.path = output_name,
.mode = PERF_DATA_MODE_WRITE,
};
signal(SIGINT, sighandler);
signal(SIGCHLD, sighandler);
signal(SIGTERM, sighandler);
evlist = evlist__new();
if (!evlist)
return -ENOMEM;
session = perf_session__new(&data, &sched->tool);
if (IS_ERR(session)) {
pr_err("Perf session creation failed.\n");
evlist__delete(evlist);
return PTR_ERR(session);
}
session->evlist = evlist;
sched->session = session;
sched->data = &data;
fd = perf_data__fd(&data);
/*
* Capture all important metadata about the system. Although they are
* not used by `perf sched stats` tool directly, they provide useful
* information about profiled environment.
*/
perf_header__set_feat(&session->header, HEADER_HOSTNAME);
perf_header__set_feat(&session->header, HEADER_OSRELEASE);
perf_header__set_feat(&session->header, HEADER_VERSION);
perf_header__set_feat(&session->header, HEADER_ARCH);
perf_header__set_feat(&session->header, HEADER_NRCPUS);
perf_header__set_feat(&session->header, HEADER_CPUDESC);
perf_header__set_feat(&session->header, HEADER_CPUID);
perf_header__set_feat(&session->header, HEADER_TOTAL_MEM);
perf_header__set_feat(&session->header, HEADER_CMDLINE);
perf_header__set_feat(&session->header, HEADER_CPU_TOPOLOGY);
perf_header__set_feat(&session->header, HEADER_NUMA_TOPOLOGY);
perf_header__set_feat(&session->header, HEADER_CACHE);
perf_header__set_feat(&session->header, HEADER_MEM_TOPOLOGY);
perf_header__set_feat(&session->header, HEADER_HYBRID_TOPOLOGY);
perf_header__set_feat(&session->header, HEADER_CPU_DOMAIN_INFO);
err = perf_session__write_header(session, evlist, fd, false);
if (err < 0)
goto out;
/*
* `perf sched stats` does not support workload profiling (-p pid)
* since /proc/schedstat file contains cpu specific data only. Hence, a
* profile target is either set of cpus or systemwide, never a process.
* Note that, although `-- <workload>` is supported, profile data are
* still cpu/systemwide.
*/
if (cpu_list)
target.cpu_list = cpu_list;
else
target.system_wide = true;
if (argc) {
err = evlist__prepare_workload(evlist, &target, argv, false, NULL);
if (err)
goto out;
}
err = evlist__create_maps(evlist, &target);
if (err < 0)
goto out;
user_requested_cpus = evlist->core.user_requested_cpus;
err = perf_event__synthesize_schedstat(&(sched->tool),
process_synthesized_schedstat_event,
user_requested_cpus);
if (err < 0)
goto out;
err = enable_sched_schedstats(&reset);
if (err < 0)
goto out;
if (argc)
evlist__start_workload(evlist);
/* wait for signal */
pause();
if (reset) {
err = disable_sched_schedstat();
if (err < 0)
goto out;
}
err = perf_event__synthesize_schedstat(&(sched->tool),
process_synthesized_schedstat_event,
user_requested_cpus);
if (err < 0)
goto out;
err = perf_session__write_header(session, evlist, fd, true);
out:
if (!err)
fprintf(stderr, "[ perf sched stats: Wrote samples to %s ]\n", data.path);
else
fprintf(stderr, "[ perf sched stats: Failed !! ]\n");
evlist__delete(evlist);
close(fd);
return err;
}
static bool schedstat_events_exposed(void)
{
/*
@ -3910,6 +4105,12 @@ int cmd_sched(int argc, const char **argv)
OPT_BOOLEAN('P', "pre-migrations", &sched.pre_migrations, "Show pre-migration wait time"),
OPT_PARENT(sched_options)
};
const struct option stats_options[] = {
OPT_STRING('o', "output", &output_name, "file",
"`stats record` with output filename"),
OPT_STRING('C', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
OPT_END()
};
const char * const latency_usage[] = {
"perf sched latency [<options>]",
@ -3927,9 +4128,13 @@ int cmd_sched(int argc, const char **argv)
"perf sched timehist [<options>]",
NULL
};
const char *stats_usage[] = {
"perf sched stats {record} [<options>]",
NULL
};
const char *const sched_subcommands[] = { "record", "latency", "map",
"replay", "script",
"timehist", NULL };
"timehist", "stats", NULL };
const char *sched_usage[] = {
NULL,
NULL
@ -4027,6 +4232,21 @@ int cmd_sched(int argc, const char **argv)
ret = symbol__validate_sym_arguments();
if (!ret)
ret = perf_sched__timehist(&sched);
} else if (!strcmp(argv[0], "stats")) {
const char *const stats_subcommands[] = {"record", NULL};
argc = parse_options_subcommand(argc, argv, stats_options,
stats_subcommands,
stats_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
if (argv[0] && !strcmp(argv[0], "record")) {
if (argc)
argc = parse_options(argc, argv, stats_options,
stats_usage, 0);
return perf_sched__schedstat_record(&sched, argc, argv);
}
usage_with_options(stats_usage, stats_options);
} else {
usage_with_options(sched_usage, sched_options);
}

View File

@ -83,6 +83,8 @@ static const char *perf_event__names[] = {
[PERF_RECORD_FINISHED_INIT] = "FINISHED_INIT",
[PERF_RECORD_COMPRESSED2] = "COMPRESSED2",
[PERF_RECORD_BPF_METADATA] = "BPF_METADATA",
[PERF_RECORD_SCHEDSTAT_CPU] = "SCHEDSTAT_CPU",
[PERF_RECORD_SCHEDSTAT_DOMAIN] = "SCHEDSTAT_DOMAIN",
};
const char *perf_event__name(unsigned int id)
@ -571,6 +573,44 @@ size_t perf_event__fprintf_text_poke(union perf_event *event, struct machine *ma
return ret;
}
size_t perf_event__fprintf_schedstat_cpu(union perf_event *event, FILE *fp)
{
struct perf_record_schedstat_cpu *cs = &event->schedstat_cpu;
size_t size = fprintf(fp, "\ncpu%u ", cs->cpu);
__u16 version = cs->version;
#define CPU_FIELD(_type, _name, _desc, _format, _is_pct, _pct_of, _ver) \
size += fprintf(fp, "%" PRIu64 " ", (uint64_t)cs->_ver._name)
if (version == 15) {
#include <perf/schedstat-v15.h>
return size;
}
#undef CPU_FIELD
return fprintf(fp, "Unsupported /proc/schedstat version %d.\n",
event->schedstat_cpu.version);
}
size_t perf_event__fprintf_schedstat_domain(union perf_event *event, FILE *fp)
{
struct perf_record_schedstat_domain *ds = &event->schedstat_domain;
__u16 version = ds->version;
size_t size = fprintf(fp, "\ndomain%u ", ds->domain);
#define DOMAIN_FIELD(_type, _name, _desc, _format, _is_jiffies, _ver) \
size += fprintf(fp, "%" PRIu64 " ", (uint64_t)ds->_ver._name)
if (version == 15) {
#include <perf/schedstat-v15.h>
return size;
}
#undef DOMAIN_FIELD
return fprintf(fp, "Unsupported /proc/schedstat version %d.\n",
event->schedstat_domain.version);
}
size_t perf_event__fprintf(union perf_event *event, struct machine *machine, FILE *fp)
{
size_t ret = fprintf(fp, "PERF_RECORD_%s",

View File

@ -392,6 +392,8 @@ size_t perf_event__fprintf_ksymbol(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_bpf(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_bpf_metadata(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_text_poke(union perf_event *event, struct machine *machine,FILE *fp);
size_t perf_event__fprintf_schedstat_cpu(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_schedstat_domain(union perf_event *event, FILE *fp);
size_t perf_event__fprintf(union perf_event *event, struct machine *machine, FILE *fp);
int kallsyms__get_function_start(const char *kallsyms_filename,

View File

@ -698,6 +698,20 @@ static void perf_event__time_conv_swap(union perf_event *event,
}
}
static void
perf_event__schedstat_cpu_swap(union perf_event *event __maybe_unused,
bool sample_id_all __maybe_unused)
{
/* FIXME */
}
static void
perf_event__schedstat_domain_swap(union perf_event *event __maybe_unused,
bool sample_id_all __maybe_unused)
{
/* FIXME */
}
typedef void (*perf_event__swap_op)(union perf_event *event,
bool sample_id_all);
@ -737,6 +751,8 @@ static perf_event__swap_op perf_event__swap_ops[] = {
[PERF_RECORD_STAT_ROUND] = perf_event__stat_round_swap,
[PERF_RECORD_EVENT_UPDATE] = perf_event__event_update_swap,
[PERF_RECORD_TIME_CONV] = perf_event__time_conv_swap,
[PERF_RECORD_SCHEDSTAT_CPU] = perf_event__schedstat_cpu_swap,
[PERF_RECORD_SCHEDSTAT_DOMAIN] = perf_event__schedstat_domain_swap,
[PERF_RECORD_HEADER_MAX] = NULL,
};
@ -1667,6 +1683,12 @@ static s64 perf_session__process_user_event(struct perf_session *session,
case PERF_RECORD_BPF_METADATA:
err = tool->bpf_metadata(tool, session, event);
break;
case PERF_RECORD_SCHEDSTAT_CPU:
err = tool->schedstat_cpu(tool, session, event);
break;
case PERF_RECORD_SCHEDSTAT_DOMAIN:
err = tool->schedstat_domain(tool, session, event);
break;
default:
err = -EINVAL;
break;

View File

@ -2529,3 +2529,182 @@ int parse_synth_opt(char *synth)
return ret;
}
static union perf_event *__synthesize_schedstat_cpu(struct io *io, __u16 version,
__u64 *cpu, __u64 timestamp)
{
struct perf_record_schedstat_cpu *cs;
union perf_event *event;
size_t size;
char ch;
size = sizeof(*cs);
size = PERF_ALIGN(size, sizeof(u64));
event = zalloc(size);
if (!event)
return NULL;
cs = &event->schedstat_cpu;
cs->header.type = PERF_RECORD_SCHEDSTAT_CPU;
cs->header.size = size;
cs->timestamp = timestamp;
if (io__get_char(io) != 'p' || io__get_char(io) != 'u')
goto out_cpu;
if (io__get_dec(io, (__u64 *)cpu) != ' ')
goto out_cpu;
#define CPU_FIELD(_type, _name, _desc, _format, _is_pct, _pct_of, _ver) \
do { \
__u64 _tmp; \
ch = io__get_dec(io, &_tmp); \
if (ch != ' ' && ch != '\n') \
goto out_cpu; \
cs->_ver._name = _tmp; \
} while (0)
if (version == 15) {
#include <perf/schedstat-v15.h>
}
#undef CPU_FIELD
cs->cpu = *cpu;
cs->version = version;
return event;
out_cpu:
free(event);
return NULL;
}
static union perf_event *__synthesize_schedstat_domain(struct io *io, __u16 version,
__u64 cpu, __u64 timestamp)
{
struct perf_record_schedstat_domain *ds;
union perf_event *event = NULL;
__u64 d_num;
size_t size;
char ch;
if (io__get_char(io) != 'o' || io__get_char(io) != 'm' || io__get_char(io) != 'a' ||
io__get_char(io) != 'i' || io__get_char(io) != 'n')
return NULL;
ch = io__get_dec(io, &d_num);
/* Skip cpumask as it can be extracted from perf header */
while (io__get_char(io) != ' ')
continue;
size = sizeof(*ds);
size = PERF_ALIGN(size, sizeof(u64));
event = zalloc(size);
ds = &event->schedstat_domain;
ds->header.type = PERF_RECORD_SCHEDSTAT_DOMAIN;
ds->header.size = size;
ds->version = version;
ds->timestamp = timestamp;
ds->domain = d_num;
#define DOMAIN_FIELD(_type, _name, _desc, _format, _is_jiffies, _ver) \
do { \
__u64 _tmp; \
ch = io__get_dec(io, &_tmp); \
if (ch != ' ' && ch != '\n') \
goto out_domain; \
ds->_ver._name = _tmp; \
} while (0)
if (version == 15) {
#include <perf/schedstat-v15.h>
}
#undef DOMAIN_FIELD
ds->cpu = cpu;
goto out;
out_domain:
free(event);
event = NULL;
out:
return event;
}
int perf_event__synthesize_schedstat(const struct perf_tool *tool,
perf_event__handler_t process,
struct perf_cpu_map *user_requested_cpus)
{
char *line = NULL, path[PATH_MAX];
union perf_event *event = NULL;
size_t line_len = 0;
char bf[BUFSIZ];
__u64 timestamp;
__u64 cpu = -1;
__u16 version;
struct io io;
int ret = -1;
char ch;
snprintf(path, PATH_MAX, "%s/schedstat", procfs__mountpoint());
io.fd = open(path, O_RDONLY, 0);
if (io.fd < 0) {
pr_err("Failed to open %s. Possibly CONFIG_SCHEDSTAT is disabled.\n", path);
return -1;
}
io__init(&io, io.fd, bf, sizeof(bf));
if (io__getline(&io, &line, &line_len) < 0 || !line_len)
goto out;
if (!strcmp(line, "version 15\n")) {
version = 15;
} else {
pr_err("Unsupported %s version: %s", path, line + 8);
goto out_free_line;
}
if (io__getline(&io, &line, &line_len) < 0 || !line_len)
goto out_free_line;
timestamp = atol(line + 10);
/*
* FIXME: Can be optimized a bit by not synthesizing domain samples
* for filtered out cpus.
*/
for (ch = io__get_char(&io); !io.eof; ch = io__get_char(&io)) {
struct perf_cpu this_cpu;
if (ch == 'c') {
event = __synthesize_schedstat_cpu(&io, version,
&cpu, timestamp);
} else if (ch == 'd') {
event = __synthesize_schedstat_domain(&io, version,
cpu, timestamp);
}
if (!event)
goto out_free_line;
this_cpu.cpu = cpu;
if (user_requested_cpus && !perf_cpu_map__has(user_requested_cpus, this_cpu))
continue;
if (process(tool, event, NULL, NULL) < 0) {
free(event);
goto out_free_line;
}
free(event);
}
ret = 0;
out_free_line:
free(line);
out:
close(io.fd);
return ret;
}

View File

@ -128,4 +128,7 @@ int perf_event__synthesize_for_pipe(const struct perf_tool *tool,
struct perf_data *data,
perf_event__handler_t process);
int perf_event__synthesize_schedstat(const struct perf_tool *tool,
perf_event__handler_t process,
struct perf_cpu_map *user_requested_cpu);
#endif // __PERF_SYNTHETIC_EVENTS_H

View File

@ -253,7 +253,25 @@ static int perf_event__process_bpf_metadata_stub(const struct perf_tool *tool __
{
if (dump_trace)
perf_event__fprintf_bpf_metadata(event, stdout);
dump_printf(": unhandled!\n");
return 0;
}
static int process_schedstat_cpu_stub(const struct perf_tool *tool __maybe_unused,
struct perf_session *perf_session __maybe_unused,
union perf_event *event)
{
if (dump_trace)
perf_event__fprintf_schedstat_cpu(event, stdout);
dump_printf(": unhandled!\n");
return 0;
}
static int process_schedstat_domain_stub(const struct perf_tool *tool __maybe_unused,
struct perf_session *perf_session __maybe_unused,
union perf_event *event)
{
if (dump_trace)
perf_event__fprintf_schedstat_domain(event, stdout);
dump_printf(": unhandled!\n");
return 0;
}
@ -317,6 +335,8 @@ void perf_tool__init(struct perf_tool *tool, bool ordered_events)
#endif
tool->finished_init = process_event_op2_stub;
tool->bpf_metadata = perf_event__process_bpf_metadata_stub;
tool->schedstat_cpu = process_schedstat_cpu_stub;
tool->schedstat_domain = process_schedstat_domain_stub;
}
bool perf_tool__compressed_is_stub(const struct perf_tool *tool)

View File

@ -81,7 +81,9 @@ struct perf_tool {
stat_round,
feature,
finished_init,
bpf_metadata;
bpf_metadata,
schedstat_cpu,
schedstat_domain;
event_op4 compressed;
event_op3 auxtrace;
bool ordered_events;