mirror of
https://github.com/torvalds/linux.git
synced 2026-05-12 16:18:45 +02:00
Miscellaneous perf events fixes:
- Fix deadlock in the perf_mmap() failure path (Peter Zijlstra)
- Intel ACR (Auto Counter Reload) fixes (Dapeng Mi):
- Fix validation and configuration of ACR masks
- Fix ACR rescheduling bug causing stale masks
- Disable the PMI on ACR-enabled hardware
- Enable ACR on Panther Cover uarch too
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-----BEGIN PGP SIGNATURE-----
iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAmn+kB0RHG1pbmdvQGtl
cm5lbC5vcmcACgkQEnMQ0APhK1ib2RAAkh19iPRq7tHCXvDYJEHztpAuioyaKznw
57pNlzG+/K/gg/yZ7uoCtXvogEHgDtart9ZtH7CVtZQgky6YfdiBq0g0pNSdOoY5
O4IB0ZIXIu/FOh1+Q1k3Md5MeuxUz1Jp21Wq+JNK6VkWwfq+oCZ3XJK06+2C45wI
uUePaEMFZn7VX9WOToZJQZME/+5yQvrgOq+D6gBs+y3UJO5u6kpdoley6fPXRtYV
hyfBYiutJlcV1dJC9g7Dc6CHrBkaolFTKsRi2RjD658fHmUUMCubsn6lSG9UJqiZ
CWtNMHJ/k1WBLuPLUaZBa3W0s+mUZ+0E6W3nLRHC2ORRQhAnQKeoDb2IWlVhYTdB
NmyABPqjwvGfgidMh39aMt8GS4lFZBXGozVNWTZprN56U/jYH/Ol4cJNLJ9Ez5yk
fzIkljCc5L/ZmxmqjJNBvmJtTpAt/FhN0qKT/k9jksISFE24bzZ0oRg3t051OyXs
Mndldyl/2EFHA2PBIN2phISTVWh5lewYNBaK0SBbx77DX6NzMdevhdGAvw2cRVT/
BJvqj+OeBfiaGBNb/lAIsoZCnuMClQi2t4jlKGkmN3n9hbgPyPAsz/WJRDLr9GZ+
cqQgh7fL80HoqZTfV7tWxKTkDK3AciXXZE+8ntBpGC6CgMmgsJqLmxc60Jzfh2OO
qGXcodOISag=
=WNs1
-----END PGP SIGNATURE-----
Merge tag 'perf-urgent-2026-05-09' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf events fixes from Ingo Molnar:
- Fix deadlock in the perf_mmap() failure path (Peter Zijlstra)
- Intel ACR (Auto Counter Reload) fixes (Dapeng Mi):
- Fix validation and configuration of ACR masks
- Fix ACR rescheduling bug causing stale masks
- Disable the PMI on ACR-enabled hardware
- Enable ACR on Panther Cover uarch too
* tag 'perf-urgent-2026-05-09' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
perf/x86/intel: Enable auto counter reload for DMR
perf/x86/intel: Disable PMI for self-reloaded ACR events
perf/x86/intel: Always reprogram ACR events to prevent stale masks
perf/x86/intel: Improve validation and configuration of ACR masks
perf/core: Fix deadlock in perf_mmap() failure path
This commit is contained in:
commit
e5cf0260a7
|
|
@ -1294,13 +1294,16 @@ int x86_perf_rdpmc_index(struct perf_event *event)
|
|||
return event->hw.event_base_rdpmc;
|
||||
}
|
||||
|
||||
static inline int match_prev_assignment(struct hw_perf_event *hwc,
|
||||
static inline int match_prev_assignment(struct perf_event *event,
|
||||
struct cpu_hw_events *cpuc,
|
||||
int i)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
|
||||
return hwc->idx == cpuc->assign[i] &&
|
||||
hwc->last_cpu == smp_processor_id() &&
|
||||
hwc->last_tag == cpuc->tags[i];
|
||||
hwc->last_cpu == smp_processor_id() &&
|
||||
hwc->last_tag == cpuc->tags[i] &&
|
||||
!is_acr_event_group(event);
|
||||
}
|
||||
|
||||
static void x86_pmu_start(struct perf_event *event, int flags);
|
||||
|
|
@ -1346,7 +1349,7 @@ static void x86_pmu_enable(struct pmu *pmu)
|
|||
* - no other event has used the counter since
|
||||
*/
|
||||
if (hwc->idx == -1 ||
|
||||
match_prev_assignment(hwc, cpuc, i))
|
||||
match_prev_assignment(event, cpuc, i))
|
||||
continue;
|
||||
|
||||
/*
|
||||
|
|
@ -1367,7 +1370,7 @@ static void x86_pmu_enable(struct pmu *pmu)
|
|||
event = cpuc->event_list[i];
|
||||
hwc = &event->hw;
|
||||
|
||||
if (!match_prev_assignment(hwc, cpuc, i))
|
||||
if (!match_prev_assignment(event, cpuc, i))
|
||||
x86_assign_hw_event(event, cpuc, i);
|
||||
else if (i < n_running)
|
||||
continue;
|
||||
|
|
|
|||
|
|
@ -3118,11 +3118,11 @@ static void intel_pmu_enable_fixed(struct perf_event *event)
|
|||
intel_set_masks(event, idx);
|
||||
|
||||
/*
|
||||
* Enable IRQ generation (0x8), if not PEBS,
|
||||
* and enable ring-3 counting (0x2) and ring-0 counting (0x1)
|
||||
* if requested:
|
||||
* Enable IRQ generation (0x8), if not PEBS or self-reloaded
|
||||
* ACR event, and enable ring-3 counting (0x2) and ring-0
|
||||
* counting (0x1) if requested:
|
||||
*/
|
||||
if (!event->attr.precise_ip)
|
||||
if (!event->attr.precise_ip && !is_acr_self_reload_event(event))
|
||||
bits |= INTEL_FIXED_0_ENABLE_PMI;
|
||||
if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
|
||||
bits |= INTEL_FIXED_0_USER;
|
||||
|
|
@ -3306,6 +3306,15 @@ static void intel_pmu_enable_event(struct perf_event *event)
|
|||
intel_set_masks(event, idx);
|
||||
static_call_cond(intel_pmu_enable_acr_event)(event);
|
||||
static_call_cond(intel_pmu_enable_event_ext)(event);
|
||||
/*
|
||||
* For self-reloaded ACR event, don't enable PMI since
|
||||
* HW won't set overflow bit in GLOBAL_STATUS. Otherwise,
|
||||
* the PMI would be recognized as a suspicious NMI.
|
||||
*/
|
||||
if (is_acr_self_reload_event(event))
|
||||
hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
|
||||
else if (!event->attr.precise_ip)
|
||||
hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
|
||||
__x86_pmu_enable_event(hwc, enable_mask);
|
||||
break;
|
||||
case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1:
|
||||
|
|
@ -3332,23 +3341,41 @@ static void intel_pmu_enable_event(struct perf_event *event)
|
|||
static void intel_pmu_acr_late_setup(struct cpu_hw_events *cpuc)
|
||||
{
|
||||
struct perf_event *event, *leader;
|
||||
int i, j, idx;
|
||||
int i, j, k, bit, idx;
|
||||
|
||||
/*
|
||||
* FIXME: ACR mask parsing relies on cpuc->event_list[] (active events only).
|
||||
* Disabling an ACR event causes bit-shifting errors in the acr_mask of
|
||||
* remaining group members. As ACR sampling requires all events to be active,
|
||||
* this limitation is acceptable for now. Revisit if independent event toggling
|
||||
* is required.
|
||||
*/
|
||||
for (i = 0; i < cpuc->n_events; i++) {
|
||||
leader = cpuc->event_list[i];
|
||||
if (!is_acr_event_group(leader))
|
||||
continue;
|
||||
|
||||
/* The ACR events must be contiguous. */
|
||||
/* Find the last event of the ACR group. */
|
||||
for (j = i; j < cpuc->n_events; j++) {
|
||||
event = cpuc->event_list[j];
|
||||
if (event->group_leader != leader->group_leader)
|
||||
break;
|
||||
for_each_set_bit(idx, (unsigned long *)&event->attr.config2, X86_PMC_IDX_MAX) {
|
||||
if (i + idx >= cpuc->n_events ||
|
||||
!is_acr_event_group(cpuc->event_list[i + idx]))
|
||||
return;
|
||||
__set_bit(cpuc->assign[i + idx], (unsigned long *)&event->hw.config1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Translate the user-space ACR mask (attr.config2) into the physical
|
||||
* counter bitmask (hw.config1) for each ACR event in the group.
|
||||
* NOTE: ACR event contiguity is guaranteed by intel_pmu_hw_config().
|
||||
*/
|
||||
for (k = i; k < j; k++) {
|
||||
event = cpuc->event_list[k];
|
||||
event->hw.config1 = 0;
|
||||
for_each_set_bit(bit, (unsigned long *)&event->attr.config2, X86_PMC_IDX_MAX) {
|
||||
idx = i + bit;
|
||||
/* Event index of ACR group must locate in [i, j). */
|
||||
if (idx >= j || !is_acr_event_group(cpuc->event_list[idx]))
|
||||
continue;
|
||||
__set_bit(cpuc->assign[idx], (unsigned long *)&event->hw.config1);
|
||||
}
|
||||
}
|
||||
i = j - 1;
|
||||
|
|
@ -7504,6 +7531,7 @@ static __always_inline void intel_pmu_init_pnc(struct pmu *pmu)
|
|||
hybrid(pmu, event_constraints) = intel_pnc_event_constraints;
|
||||
hybrid(pmu, pebs_constraints) = intel_pnc_pebs_event_constraints;
|
||||
hybrid(pmu, extra_regs) = intel_pnc_extra_regs;
|
||||
static_call_update(intel_pmu_enable_acr_event, intel_pmu_enable_acr);
|
||||
}
|
||||
|
||||
static __always_inline void intel_pmu_init_skt(struct pmu *pmu)
|
||||
|
|
|
|||
|
|
@ -137,6 +137,16 @@ static inline bool is_acr_event_group(struct perf_event *event)
|
|||
return check_leader_group(event->group_leader, PERF_X86_EVENT_ACR);
|
||||
}
|
||||
|
||||
static inline bool is_acr_self_reload_event(struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
|
||||
if (hwc->idx < 0)
|
||||
return false;
|
||||
|
||||
return test_bit(hwc->idx, (unsigned long *)&hwc->config1);
|
||||
}
|
||||
|
||||
struct amd_nb {
|
||||
int nb_id; /* NorthBridge id */
|
||||
int refcnt; /* reference count */
|
||||
|
|
|
|||
|
|
@ -7006,6 +7006,7 @@ static void perf_mmap_open(struct vm_area_struct *vma)
|
|||
}
|
||||
|
||||
static void perf_pmu_output_stop(struct perf_event *event);
|
||||
static void perf_mmap_unaccount(struct vm_area_struct *vma, struct perf_buffer *rb);
|
||||
|
||||
/*
|
||||
* A buffer can be mmap()ed multiple times; either directly through the same
|
||||
|
|
@ -7021,8 +7022,6 @@ static void perf_mmap_close(struct vm_area_struct *vma)
|
|||
mapped_f unmapped = get_mapped(event, event_unmapped);
|
||||
struct perf_buffer *rb = ring_buffer_get(event);
|
||||
struct user_struct *mmap_user = rb->mmap_user;
|
||||
int mmap_locked = rb->mmap_locked;
|
||||
unsigned long size = perf_data_size(rb);
|
||||
bool detach_rest = false;
|
||||
|
||||
/* FIXIES vs perf_pmu_unregister() */
|
||||
|
|
@ -7117,11 +7116,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
|
|||
* Aside from that, this buffer is 'fully' detached and unmapped,
|
||||
* undo the VM accounting.
|
||||
*/
|
||||
|
||||
atomic_long_sub((size >> PAGE_SHIFT) + 1 - mmap_locked,
|
||||
&mmap_user->locked_vm);
|
||||
atomic64_sub(mmap_locked, &vma->vm_mm->pinned_vm);
|
||||
free_uid(mmap_user);
|
||||
perf_mmap_unaccount(vma, rb);
|
||||
|
||||
out_put:
|
||||
ring_buffer_put(rb); /* could be last */
|
||||
|
|
@ -7261,6 +7256,15 @@ static void perf_mmap_account(struct vm_area_struct *vma, long user_extra, long
|
|||
atomic64_add(extra, &vma->vm_mm->pinned_vm);
|
||||
}
|
||||
|
||||
static void perf_mmap_unaccount(struct vm_area_struct *vma, struct perf_buffer *rb)
|
||||
{
|
||||
struct user_struct *user = rb->mmap_user;
|
||||
|
||||
atomic_long_sub((perf_data_size(rb) >> PAGE_SHIFT) + 1 - rb->mmap_locked,
|
||||
&user->locked_vm);
|
||||
atomic64_sub(rb->mmap_locked, &vma->vm_mm->pinned_vm);
|
||||
}
|
||||
|
||||
static int perf_mmap_rb(struct vm_area_struct *vma, struct perf_event *event,
|
||||
unsigned long nr_pages)
|
||||
{
|
||||
|
|
@ -7323,8 +7327,6 @@ static int perf_mmap_rb(struct vm_area_struct *vma, struct perf_event *event,
|
|||
if (!rb)
|
||||
return -ENOMEM;
|
||||
|
||||
refcount_set(&rb->mmap_count, 1);
|
||||
rb->mmap_user = get_current_user();
|
||||
rb->mmap_locked = extra;
|
||||
|
||||
ring_buffer_attach(event, rb);
|
||||
|
|
@ -7474,16 +7476,54 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
|
|||
mapped(event, vma->vm_mm);
|
||||
|
||||
/*
|
||||
* Try to map it into the page table. On fail, invoke
|
||||
* perf_mmap_close() to undo the above, as the callsite expects
|
||||
* full cleanup in this case and therefore does not invoke
|
||||
* vmops::close().
|
||||
* Try to map it into the page table. On fail undo the above,
|
||||
* as the callsite expects full cleanup in this case and
|
||||
* therefore does not invoke vmops::close().
|
||||
*/
|
||||
ret = map_range(event->rb, vma);
|
||||
if (ret)
|
||||
perf_mmap_close(vma);
|
||||
if (likely(!ret))
|
||||
return 0;
|
||||
|
||||
/* Error path */
|
||||
|
||||
/*
|
||||
* If this is the first mmap(), then event->mmap_count should
|
||||
* be stable at 1. It is only modified by:
|
||||
* perf_mmap_{open,close}() and perf_mmap().
|
||||
*
|
||||
* The former are not possible because this mmap() hasn't been
|
||||
* successful yet, and the latter is serialized by
|
||||
* event->mmap_mutex which we still hold (note that mmap_lock
|
||||
* is not strictly sufficient here, because the event fd can
|
||||
* be passed to another process through trivial means like
|
||||
* fork(), leading to concurrent mmap() from different mm).
|
||||
*
|
||||
* Make sure to remove event->rb before releasing
|
||||
* event->mmap_mutex, such that any concurrent mmap() will not
|
||||
* attempt use this failed buffer.
|
||||
*/
|
||||
if (refcount_read(&event->mmap_count) == 1) {
|
||||
/*
|
||||
* Minimal perf_mmap_close(); there can't be AUX or
|
||||
* other events on account of this being the first.
|
||||
*/
|
||||
mapped = get_mapped(event, event_unmapped);
|
||||
if (mapped)
|
||||
mapped(event, vma->vm_mm);
|
||||
perf_mmap_unaccount(vma, event->rb);
|
||||
ring_buffer_attach(event, NULL); /* drops last rb->refcount */
|
||||
refcount_set(&event->mmap_count, 0);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Otherwise this is an already existing buffer, and there is
|
||||
* no race vs first exposure, so fall-through and call
|
||||
* perf_mmap_close().
|
||||
*/
|
||||
}
|
||||
|
||||
perf_mmap_close(vma);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -67,6 +67,7 @@ static inline void rb_free_rcu(struct rcu_head *rcu_head)
|
|||
struct perf_buffer *rb;
|
||||
|
||||
rb = container_of(rcu_head, struct perf_buffer, rcu_head);
|
||||
free_uid(rb->mmap_user);
|
||||
rb_free(rb);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -340,6 +340,8 @@ ring_buffer_init(struct perf_buffer *rb, long watermark, int flags)
|
|||
rb->paused = 1;
|
||||
|
||||
mutex_init(&rb->aux_mutex);
|
||||
rb->mmap_user = get_current_user();
|
||||
refcount_set(&rb->mmap_count, 1);
|
||||
}
|
||||
|
||||
void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags)
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user