Miscellaneous perf events fixes:

- Fix deadlock in the perf_mmap() failure path (Peter Zijlstra)
 
  - Intel ACR (Auto Counter Reload) fixes (Dapeng Mi):
 
    - Fix validation and configuration of ACR masks
    - Fix ACR rescheduling bug causing stale masks
    - Disable the PMI on ACR-enabled hardware
    - Enable ACR on Panther Cover uarch too
 
 Signed-off-by: Ingo Molnar <mingo@kernel.org>
 -----BEGIN PGP SIGNATURE-----
 
 iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAmn+kB0RHG1pbmdvQGtl
 cm5lbC5vcmcACgkQEnMQ0APhK1ib2RAAkh19iPRq7tHCXvDYJEHztpAuioyaKznw
 57pNlzG+/K/gg/yZ7uoCtXvogEHgDtart9ZtH7CVtZQgky6YfdiBq0g0pNSdOoY5
 O4IB0ZIXIu/FOh1+Q1k3Md5MeuxUz1Jp21Wq+JNK6VkWwfq+oCZ3XJK06+2C45wI
 uUePaEMFZn7VX9WOToZJQZME/+5yQvrgOq+D6gBs+y3UJO5u6kpdoley6fPXRtYV
 hyfBYiutJlcV1dJC9g7Dc6CHrBkaolFTKsRi2RjD658fHmUUMCubsn6lSG9UJqiZ
 CWtNMHJ/k1WBLuPLUaZBa3W0s+mUZ+0E6W3nLRHC2ORRQhAnQKeoDb2IWlVhYTdB
 NmyABPqjwvGfgidMh39aMt8GS4lFZBXGozVNWTZprN56U/jYH/Ol4cJNLJ9Ez5yk
 fzIkljCc5L/ZmxmqjJNBvmJtTpAt/FhN0qKT/k9jksISFE24bzZ0oRg3t051OyXs
 Mndldyl/2EFHA2PBIN2phISTVWh5lewYNBaK0SBbx77DX6NzMdevhdGAvw2cRVT/
 BJvqj+OeBfiaGBNb/lAIsoZCnuMClQi2t4jlKGkmN3n9hbgPyPAsz/WJRDLr9GZ+
 cqQgh7fL80HoqZTfV7tWxKTkDK3AciXXZE+8ntBpGC6CgMmgsJqLmxc60Jzfh2OO
 qGXcodOISag=
 =WNs1
 -----END PGP SIGNATURE-----

Merge tag 'perf-urgent-2026-05-09' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf events fixes from Ingo Molnar:

 - Fix deadlock in the perf_mmap() failure path (Peter Zijlstra)

 - Intel ACR (Auto Counter Reload) fixes (Dapeng Mi):
     - Fix validation and configuration of ACR masks
     - Fix ACR rescheduling bug causing stale masks
     - Disable the PMI on ACR-enabled hardware
     - Enable ACR on Panther Cover uarch too

* tag 'perf-urgent-2026-05-09' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  perf/x86/intel: Enable auto counter reload for DMR
  perf/x86/intel: Disable PMI for self-reloaded ACR events
  perf/x86/intel: Always reprogram ACR events to prevent stale masks
  perf/x86/intel: Improve validation and configuration of ACR masks
  perf/core: Fix deadlock in perf_mmap() failure path
This commit is contained in:
Linus Torvalds 2026-05-08 19:39:18 -07:00
commit e5cf0260a7
6 changed files with 115 additions and 31 deletions

View File

@ -1294,13 +1294,16 @@ int x86_perf_rdpmc_index(struct perf_event *event)
return event->hw.event_base_rdpmc;
}
static inline int match_prev_assignment(struct hw_perf_event *hwc,
static inline int match_prev_assignment(struct perf_event *event,
struct cpu_hw_events *cpuc,
int i)
{
struct hw_perf_event *hwc = &event->hw;
return hwc->idx == cpuc->assign[i] &&
hwc->last_cpu == smp_processor_id() &&
hwc->last_tag == cpuc->tags[i];
hwc->last_cpu == smp_processor_id() &&
hwc->last_tag == cpuc->tags[i] &&
!is_acr_event_group(event);
}
static void x86_pmu_start(struct perf_event *event, int flags);
@ -1346,7 +1349,7 @@ static void x86_pmu_enable(struct pmu *pmu)
* - no other event has used the counter since
*/
if (hwc->idx == -1 ||
match_prev_assignment(hwc, cpuc, i))
match_prev_assignment(event, cpuc, i))
continue;
/*
@ -1367,7 +1370,7 @@ static void x86_pmu_enable(struct pmu *pmu)
event = cpuc->event_list[i];
hwc = &event->hw;
if (!match_prev_assignment(hwc, cpuc, i))
if (!match_prev_assignment(event, cpuc, i))
x86_assign_hw_event(event, cpuc, i);
else if (i < n_running)
continue;

View File

@ -3118,11 +3118,11 @@ static void intel_pmu_enable_fixed(struct perf_event *event)
intel_set_masks(event, idx);
/*
* Enable IRQ generation (0x8), if not PEBS,
* and enable ring-3 counting (0x2) and ring-0 counting (0x1)
* if requested:
* Enable IRQ generation (0x8), if not PEBS or self-reloaded
* ACR event, and enable ring-3 counting (0x2) and ring-0
* counting (0x1) if requested:
*/
if (!event->attr.precise_ip)
if (!event->attr.precise_ip && !is_acr_self_reload_event(event))
bits |= INTEL_FIXED_0_ENABLE_PMI;
if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
bits |= INTEL_FIXED_0_USER;
@ -3306,6 +3306,15 @@ static void intel_pmu_enable_event(struct perf_event *event)
intel_set_masks(event, idx);
static_call_cond(intel_pmu_enable_acr_event)(event);
static_call_cond(intel_pmu_enable_event_ext)(event);
/*
* For self-reloaded ACR event, don't enable PMI since
* HW won't set overflow bit in GLOBAL_STATUS. Otherwise,
* the PMI would be recognized as a suspicious NMI.
*/
if (is_acr_self_reload_event(event))
hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
else if (!event->attr.precise_ip)
hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
__x86_pmu_enable_event(hwc, enable_mask);
break;
case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1:
@ -3332,23 +3341,41 @@ static void intel_pmu_enable_event(struct perf_event *event)
static void intel_pmu_acr_late_setup(struct cpu_hw_events *cpuc)
{
struct perf_event *event, *leader;
int i, j, idx;
int i, j, k, bit, idx;
/*
* FIXME: ACR mask parsing relies on cpuc->event_list[] (active events only).
* Disabling an ACR event causes bit-shifting errors in the acr_mask of
* remaining group members. As ACR sampling requires all events to be active,
* this limitation is acceptable for now. Revisit if independent event toggling
* is required.
*/
for (i = 0; i < cpuc->n_events; i++) {
leader = cpuc->event_list[i];
if (!is_acr_event_group(leader))
continue;
/* The ACR events must be contiguous. */
/* Find the last event of the ACR group. */
for (j = i; j < cpuc->n_events; j++) {
event = cpuc->event_list[j];
if (event->group_leader != leader->group_leader)
break;
for_each_set_bit(idx, (unsigned long *)&event->attr.config2, X86_PMC_IDX_MAX) {
if (i + idx >= cpuc->n_events ||
!is_acr_event_group(cpuc->event_list[i + idx]))
return;
__set_bit(cpuc->assign[i + idx], (unsigned long *)&event->hw.config1);
}
/*
* Translate the user-space ACR mask (attr.config2) into the physical
* counter bitmask (hw.config1) for each ACR event in the group.
* NOTE: ACR event contiguity is guaranteed by intel_pmu_hw_config().
*/
for (k = i; k < j; k++) {
event = cpuc->event_list[k];
event->hw.config1 = 0;
for_each_set_bit(bit, (unsigned long *)&event->attr.config2, X86_PMC_IDX_MAX) {
idx = i + bit;
/* Event index of ACR group must locate in [i, j). */
if (idx >= j || !is_acr_event_group(cpuc->event_list[idx]))
continue;
__set_bit(cpuc->assign[idx], (unsigned long *)&event->hw.config1);
}
}
i = j - 1;
@ -7504,6 +7531,7 @@ static __always_inline void intel_pmu_init_pnc(struct pmu *pmu)
hybrid(pmu, event_constraints) = intel_pnc_event_constraints;
hybrid(pmu, pebs_constraints) = intel_pnc_pebs_event_constraints;
hybrid(pmu, extra_regs) = intel_pnc_extra_regs;
static_call_update(intel_pmu_enable_acr_event, intel_pmu_enable_acr);
}
static __always_inline void intel_pmu_init_skt(struct pmu *pmu)

View File

@ -137,6 +137,16 @@ static inline bool is_acr_event_group(struct perf_event *event)
return check_leader_group(event->group_leader, PERF_X86_EVENT_ACR);
}
static inline bool is_acr_self_reload_event(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
if (hwc->idx < 0)
return false;
return test_bit(hwc->idx, (unsigned long *)&hwc->config1);
}
struct amd_nb {
int nb_id; /* NorthBridge id */
int refcnt; /* reference count */

View File

@ -7006,6 +7006,7 @@ static void perf_mmap_open(struct vm_area_struct *vma)
}
static void perf_pmu_output_stop(struct perf_event *event);
static void perf_mmap_unaccount(struct vm_area_struct *vma, struct perf_buffer *rb);
/*
* A buffer can be mmap()ed multiple times; either directly through the same
@ -7021,8 +7022,6 @@ static void perf_mmap_close(struct vm_area_struct *vma)
mapped_f unmapped = get_mapped(event, event_unmapped);
struct perf_buffer *rb = ring_buffer_get(event);
struct user_struct *mmap_user = rb->mmap_user;
int mmap_locked = rb->mmap_locked;
unsigned long size = perf_data_size(rb);
bool detach_rest = false;
/* FIXIES vs perf_pmu_unregister() */
@ -7117,11 +7116,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
* Aside from that, this buffer is 'fully' detached and unmapped,
* undo the VM accounting.
*/
atomic_long_sub((size >> PAGE_SHIFT) + 1 - mmap_locked,
&mmap_user->locked_vm);
atomic64_sub(mmap_locked, &vma->vm_mm->pinned_vm);
free_uid(mmap_user);
perf_mmap_unaccount(vma, rb);
out_put:
ring_buffer_put(rb); /* could be last */
@ -7261,6 +7256,15 @@ static void perf_mmap_account(struct vm_area_struct *vma, long user_extra, long
atomic64_add(extra, &vma->vm_mm->pinned_vm);
}
static void perf_mmap_unaccount(struct vm_area_struct *vma, struct perf_buffer *rb)
{
struct user_struct *user = rb->mmap_user;
atomic_long_sub((perf_data_size(rb) >> PAGE_SHIFT) + 1 - rb->mmap_locked,
&user->locked_vm);
atomic64_sub(rb->mmap_locked, &vma->vm_mm->pinned_vm);
}
static int perf_mmap_rb(struct vm_area_struct *vma, struct perf_event *event,
unsigned long nr_pages)
{
@ -7323,8 +7327,6 @@ static int perf_mmap_rb(struct vm_area_struct *vma, struct perf_event *event,
if (!rb)
return -ENOMEM;
refcount_set(&rb->mmap_count, 1);
rb->mmap_user = get_current_user();
rb->mmap_locked = extra;
ring_buffer_attach(event, rb);
@ -7474,16 +7476,54 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
mapped(event, vma->vm_mm);
/*
* Try to map it into the page table. On fail, invoke
* perf_mmap_close() to undo the above, as the callsite expects
* full cleanup in this case and therefore does not invoke
* vmops::close().
* Try to map it into the page table. On fail undo the above,
* as the callsite expects full cleanup in this case and
* therefore does not invoke vmops::close().
*/
ret = map_range(event->rb, vma);
if (ret)
perf_mmap_close(vma);
if (likely(!ret))
return 0;
/* Error path */
/*
* If this is the first mmap(), then event->mmap_count should
* be stable at 1. It is only modified by:
* perf_mmap_{open,close}() and perf_mmap().
*
* The former are not possible because this mmap() hasn't been
* successful yet, and the latter is serialized by
* event->mmap_mutex which we still hold (note that mmap_lock
* is not strictly sufficient here, because the event fd can
* be passed to another process through trivial means like
* fork(), leading to concurrent mmap() from different mm).
*
* Make sure to remove event->rb before releasing
* event->mmap_mutex, such that any concurrent mmap() will not
* attempt use this failed buffer.
*/
if (refcount_read(&event->mmap_count) == 1) {
/*
* Minimal perf_mmap_close(); there can't be AUX or
* other events on account of this being the first.
*/
mapped = get_mapped(event, event_unmapped);
if (mapped)
mapped(event, vma->vm_mm);
perf_mmap_unaccount(vma, event->rb);
ring_buffer_attach(event, NULL); /* drops last rb->refcount */
refcount_set(&event->mmap_count, 0);
return ret;
}
/*
* Otherwise this is an already existing buffer, and there is
* no race vs first exposure, so fall-through and call
* perf_mmap_close().
*/
}
perf_mmap_close(vma);
return ret;
}

View File

@ -67,6 +67,7 @@ static inline void rb_free_rcu(struct rcu_head *rcu_head)
struct perf_buffer *rb;
rb = container_of(rcu_head, struct perf_buffer, rcu_head);
free_uid(rb->mmap_user);
rb_free(rb);
}

View File

@ -340,6 +340,8 @@ ring_buffer_init(struct perf_buffer *rb, long watermark, int flags)
rb->paused = 1;
mutex_init(&rb->aux_mutex);
rb->mmap_user = get_current_user();
refcount_set(&rb->mmap_count, 1);
}
void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags)