mirror of
https://github.com/torvalds/linux.git
synced 2026-05-28 09:04:39 +02:00
perf/x86/lbr: Fix shorter LBRs call stacks for the system-wide mode
In the system-wide mode, LBR callstacks are shorter in comparison to
the per-process mode.
LBR MSRs are reset during a context switch in the system-wide mode. For
the LBR call stack, the LBRs should be always saved/restored during a
context switch.
Use the space in task_struct to save/restore the LBR call stack data.
For a system-wide event, it's unnecessagy to update the
lbr_callstack_users for each threads. Add a variable in x86_pmu to
indicate whether the system-wide event is active.
Fixes: 76cb2c617f ("perf/x86/intel: Save/restore LBR stack during context switch")
Reported-by: Andi Kleen <ak@linux.intel.com>
Reported-by: Alexey Budankov <alexey.budankov@linux.intel.com>
Debugged-by: Alexey Budankov <alexey.budankov@linux.intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20250314172700.438923-5-kan.liang@linux.intel.com
This commit is contained in:
parent
d57e94f5b8
commit
3cec9fd035
|
|
@ -422,11 +422,17 @@ static __always_inline bool lbr_is_reset_in_cstate(void *ctx)
|
|||
return !rdlbr_from(((struct x86_perf_task_context *)ctx)->tos, NULL);
|
||||
}
|
||||
|
||||
static inline bool has_lbr_callstack_users(void *ctx)
|
||||
{
|
||||
return task_context_opt(ctx)->lbr_callstack_users ||
|
||||
x86_pmu.lbr_callstack_users;
|
||||
}
|
||||
|
||||
static void __intel_pmu_lbr_restore(void *ctx)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
|
||||
if (task_context_opt(ctx)->lbr_callstack_users == 0 ||
|
||||
if (!has_lbr_callstack_users(ctx) ||
|
||||
task_context_opt(ctx)->lbr_stack_state == LBR_NONE) {
|
||||
intel_pmu_lbr_reset();
|
||||
return;
|
||||
|
|
@ -503,7 +509,7 @@ static void __intel_pmu_lbr_save(void *ctx)
|
|||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
|
||||
if (task_context_opt(ctx)->lbr_callstack_users == 0) {
|
||||
if (!has_lbr_callstack_users(ctx)) {
|
||||
task_context_opt(ctx)->lbr_stack_state = LBR_NONE;
|
||||
return;
|
||||
}
|
||||
|
|
@ -543,6 +549,7 @@ void intel_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx,
|
|||
struct task_struct *task, bool sched_in)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
struct perf_ctx_data *ctx_data;
|
||||
void *task_ctx;
|
||||
|
||||
if (!cpuc->lbr_users)
|
||||
|
|
@ -553,14 +560,18 @@ void intel_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx,
|
|||
* the task was scheduled out, restore the stack. Otherwise flush
|
||||
* the LBR stack.
|
||||
*/
|
||||
task_ctx = pmu_ctx ? pmu_ctx->task_ctx_data : NULL;
|
||||
rcu_read_lock();
|
||||
ctx_data = rcu_dereference(task->perf_ctx_data);
|
||||
task_ctx = ctx_data ? ctx_data->data : NULL;
|
||||
if (task_ctx) {
|
||||
if (sched_in)
|
||||
__intel_pmu_lbr_restore(task_ctx);
|
||||
else
|
||||
__intel_pmu_lbr_save(task_ctx);
|
||||
rcu_read_unlock();
|
||||
return;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
/*
|
||||
* Since a context switch can flip the address space and LBR entries
|
||||
|
|
@ -589,9 +600,19 @@ void intel_pmu_lbr_add(struct perf_event *event)
|
|||
|
||||
cpuc->br_sel = event->hw.branch_reg.reg;
|
||||
|
||||
if (branch_user_callstack(cpuc->br_sel) && event->pmu_ctx->task_ctx_data)
|
||||
task_context_opt(event->pmu_ctx->task_ctx_data)->lbr_callstack_users++;
|
||||
if (branch_user_callstack(cpuc->br_sel)) {
|
||||
if (event->attach_state & PERF_ATTACH_TASK) {
|
||||
struct task_struct *task = event->hw.target;
|
||||
struct perf_ctx_data *ctx_data;
|
||||
|
||||
rcu_read_lock();
|
||||
ctx_data = rcu_dereference(task->perf_ctx_data);
|
||||
if (ctx_data)
|
||||
task_context_opt(ctx_data->data)->lbr_callstack_users++;
|
||||
rcu_read_unlock();
|
||||
} else
|
||||
x86_pmu.lbr_callstack_users++;
|
||||
}
|
||||
/*
|
||||
* Request pmu::sched_task() callback, which will fire inside the
|
||||
* regular perf event scheduling, so that call will:
|
||||
|
|
@ -665,9 +686,19 @@ void intel_pmu_lbr_del(struct perf_event *event)
|
|||
if (!x86_pmu.lbr_nr)
|
||||
return;
|
||||
|
||||
if (branch_user_callstack(cpuc->br_sel) &&
|
||||
event->pmu_ctx->task_ctx_data)
|
||||
task_context_opt(event->pmu_ctx->task_ctx_data)->lbr_callstack_users--;
|
||||
if (branch_user_callstack(cpuc->br_sel)) {
|
||||
if (event->attach_state & PERF_ATTACH_TASK) {
|
||||
struct task_struct *task = event->hw.target;
|
||||
struct perf_ctx_data *ctx_data;
|
||||
|
||||
rcu_read_lock();
|
||||
ctx_data = rcu_dereference(task->perf_ctx_data);
|
||||
if (ctx_data)
|
||||
task_context_opt(ctx_data->data)->lbr_callstack_users--;
|
||||
rcu_read_unlock();
|
||||
} else
|
||||
x86_pmu.lbr_callstack_users--;
|
||||
}
|
||||
|
||||
if (event->hw.flags & PERF_X86_EVENT_LBR_SELECT)
|
||||
cpuc->lbr_select = 0;
|
||||
|
|
|
|||
|
|
@ -920,6 +920,7 @@ struct x86_pmu {
|
|||
const int *lbr_sel_map; /* lbr_select mappings */
|
||||
int *lbr_ctl_map; /* LBR_CTL mappings */
|
||||
};
|
||||
u64 lbr_callstack_users; /* lbr callstack system wide users */
|
||||
bool lbr_double_abort; /* duplicated lbr aborts */
|
||||
bool lbr_pt_coexist; /* (LBR|BTS) may coexist with PT */
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user