From c69df06e4e26e50611190ce04eab92c5cc261b61 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 26 Mar 2026 12:28:21 +0100 Subject: [PATCH] perf/core: Fix deadlock in perf_mmap() failure path Ian noted that commit 77de62ad3de3 ("perf/core: Fix refcount bug and potential UAF in perf_mmap") would cause a deadlock due to event->mmap_mutex recursion. This happens because we're now calling perf_mmap_close() under mmap_mutex, while that function itself can also take mmap_mutex. Solve this by noting that perf_mmap_close() is far more complicated than we need at this particular point, since it deals with scenarios that cannot happen in this particular case. Replace the call to perf_mmap_close() with a very narrow undo for the case of first-exposure. If this is not the first mmap(), there is no race and it is fine to drop the lock and call perf_mmap_close() to handle to more complicated scenarios. Note: move the rb->mmap_user (namespace) handling into the rb init/free code such that it does not complicate the mmap handling. Fixes: 77de62ad3de3 ("perf/core: Fix refcount bug and potential UAF in perf_mmap") Reported-by: Ian Rogers Closes: https://patch.msgid.link/CAP-5%3DfVJyVMZw%3DDqP53Kxg58nUmJ_0bxoaeOKAbC03BVc11HaA%40mail.gmail.com Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20260326112821.GK3738786@noisy.programming.kicks-ass.net --- kernel/events/core.c | 70 +++++++++++++++++++++++++++++-------- kernel/events/internal.h | 1 + kernel/events/ring_buffer.c | 2 ++ 3 files changed, 58 insertions(+), 15 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 6d1f8bad7e1c..7935d5663944 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7006,6 +7006,7 @@ static void perf_mmap_open(struct vm_area_struct *vma) } static void perf_pmu_output_stop(struct perf_event *event); +static void perf_mmap_unaccount(struct vm_area_struct *vma, struct perf_buffer *rb); /* * A buffer can be mmap()ed multiple times; either directly through the same @@ -7021,8 +7022,6 @@ static void perf_mmap_close(struct vm_area_struct *vma) mapped_f unmapped = get_mapped(event, event_unmapped); struct perf_buffer *rb = ring_buffer_get(event); struct user_struct *mmap_user = rb->mmap_user; - int mmap_locked = rb->mmap_locked; - unsigned long size = perf_data_size(rb); bool detach_rest = false; /* FIXIES vs perf_pmu_unregister() */ @@ -7117,11 +7116,7 @@ static void perf_mmap_close(struct vm_area_struct *vma) * Aside from that, this buffer is 'fully' detached and unmapped, * undo the VM accounting. */ - - atomic_long_sub((size >> PAGE_SHIFT) + 1 - mmap_locked, - &mmap_user->locked_vm); - atomic64_sub(mmap_locked, &vma->vm_mm->pinned_vm); - free_uid(mmap_user); + perf_mmap_unaccount(vma, rb); out_put: ring_buffer_put(rb); /* could be last */ @@ -7261,6 +7256,15 @@ static void perf_mmap_account(struct vm_area_struct *vma, long user_extra, long atomic64_add(extra, &vma->vm_mm->pinned_vm); } +static void perf_mmap_unaccount(struct vm_area_struct *vma, struct perf_buffer *rb) +{ + struct user_struct *user = rb->mmap_user; + + atomic_long_sub((perf_data_size(rb) >> PAGE_SHIFT) + 1 - rb->mmap_locked, + &user->locked_vm); + atomic64_sub(rb->mmap_locked, &vma->vm_mm->pinned_vm); +} + static int perf_mmap_rb(struct vm_area_struct *vma, struct perf_event *event, unsigned long nr_pages) { @@ -7323,8 +7327,6 @@ static int perf_mmap_rb(struct vm_area_struct *vma, struct perf_event *event, if (!rb) return -ENOMEM; - refcount_set(&rb->mmap_count, 1); - rb->mmap_user = get_current_user(); rb->mmap_locked = extra; ring_buffer_attach(event, rb); @@ -7474,16 +7476,54 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) mapped(event, vma->vm_mm); /* - * Try to map it into the page table. On fail, invoke - * perf_mmap_close() to undo the above, as the callsite expects - * full cleanup in this case and therefore does not invoke - * vmops::close(). + * Try to map it into the page table. On fail undo the above, + * as the callsite expects full cleanup in this case and + * therefore does not invoke vmops::close(). */ ret = map_range(event->rb, vma); - if (ret) - perf_mmap_close(vma); + if (likely(!ret)) + return 0; + + /* Error path */ + + /* + * If this is the first mmap(), then event->mmap_count should + * be stable at 1. It is only modified by: + * perf_mmap_{open,close}() and perf_mmap(). + * + * The former are not possible because this mmap() hasn't been + * successful yet, and the latter is serialized by + * event->mmap_mutex which we still hold (note that mmap_lock + * is not strictly sufficient here, because the event fd can + * be passed to another process through trivial means like + * fork(), leading to concurrent mmap() from different mm). + * + * Make sure to remove event->rb before releasing + * event->mmap_mutex, such that any concurrent mmap() will not + * attempt use this failed buffer. + */ + if (refcount_read(&event->mmap_count) == 1) { + /* + * Minimal perf_mmap_close(); there can't be AUX or + * other events on account of this being the first. + */ + mapped = get_mapped(event, event_unmapped); + if (mapped) + mapped(event, vma->vm_mm); + perf_mmap_unaccount(vma, event->rb); + ring_buffer_attach(event, NULL); /* drops last rb->refcount */ + refcount_set(&event->mmap_count, 0); + return ret; + } + + /* + * Otherwise this is an already existing buffer, and there is + * no race vs first exposure, so fall-through and call + * perf_mmap_close(). + */ } + perf_mmap_close(vma); return ret; } diff --git a/kernel/events/internal.h b/kernel/events/internal.h index d9cc57083091..c03c4f2eea57 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -67,6 +67,7 @@ static inline void rb_free_rcu(struct rcu_head *rcu_head) struct perf_buffer *rb; rb = container_of(rcu_head, struct perf_buffer, rcu_head); + free_uid(rb->mmap_user); rb_free(rb); } diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 3e7de2661417..9fe92161715e 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -340,6 +340,8 @@ ring_buffer_init(struct perf_buffer *rb, long watermark, int flags) rb->paused = 1; mutex_init(&rb->aux_mutex); + rb->mmap_user = get_current_user(); + refcount_set(&rb->mmap_count, 1); } void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags)