perf/core: Fix deadlock in perf_mmap() failure path

Ian noted that commit 77de62ad3d ("perf/core: Fix refcount bug and
potential UAF in perf_mmap") would cause a deadlock due to
event->mmap_mutex recursion.

This happens because we're now calling perf_mmap_close() under
mmap_mutex, while that function itself can also take mmap_mutex.

Solve this by noting that perf_mmap_close() is far more complicated
than we need at this particular point, since it deals with scenarios
that cannot happen in this particular case.

Replace the call to perf_mmap_close() with a very narrow undo for the
case of first-exposure. If this is not the first mmap(), there is no
race and it is fine to drop the lock and call perf_mmap_close() to
handle to more complicated scenarios.

Note: move the rb->mmap_user (namespace) handling into the rb
init/free code such that it does not complicate the mmap handling.

Fixes: 77de62ad3d ("perf/core: Fix refcount bug and potential UAF in perf_mmap")
Reported-by: Ian Rogers <irogers@google.com>
Closes: https://patch.msgid.link/CAP-5%3DfVJyVMZw%3DDqP53Kxg58nUmJ_0bxoaeOKAbC03BVc11HaA%40mail.gmail.com
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260326112821.GK3738786@noisy.programming.kicks-ass.net
This commit is contained in:
Peter Zijlstra 2026-03-26 12:28:21 +01:00
parent 7fd2df204f
commit c69df06e4e
3 changed files with 58 additions and 15 deletions

View File

@ -7006,6 +7006,7 @@ static void perf_mmap_open(struct vm_area_struct *vma)
}
static void perf_pmu_output_stop(struct perf_event *event);
static void perf_mmap_unaccount(struct vm_area_struct *vma, struct perf_buffer *rb);
/*
* A buffer can be mmap()ed multiple times; either directly through the same
@ -7021,8 +7022,6 @@ static void perf_mmap_close(struct vm_area_struct *vma)
mapped_f unmapped = get_mapped(event, event_unmapped);
struct perf_buffer *rb = ring_buffer_get(event);
struct user_struct *mmap_user = rb->mmap_user;
int mmap_locked = rb->mmap_locked;
unsigned long size = perf_data_size(rb);
bool detach_rest = false;
/* FIXIES vs perf_pmu_unregister() */
@ -7117,11 +7116,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
* Aside from that, this buffer is 'fully' detached and unmapped,
* undo the VM accounting.
*/
atomic_long_sub((size >> PAGE_SHIFT) + 1 - mmap_locked,
&mmap_user->locked_vm);
atomic64_sub(mmap_locked, &vma->vm_mm->pinned_vm);
free_uid(mmap_user);
perf_mmap_unaccount(vma, rb);
out_put:
ring_buffer_put(rb); /* could be last */
@ -7261,6 +7256,15 @@ static void perf_mmap_account(struct vm_area_struct *vma, long user_extra, long
atomic64_add(extra, &vma->vm_mm->pinned_vm);
}
static void perf_mmap_unaccount(struct vm_area_struct *vma, struct perf_buffer *rb)
{
struct user_struct *user = rb->mmap_user;
atomic_long_sub((perf_data_size(rb) >> PAGE_SHIFT) + 1 - rb->mmap_locked,
&user->locked_vm);
atomic64_sub(rb->mmap_locked, &vma->vm_mm->pinned_vm);
}
static int perf_mmap_rb(struct vm_area_struct *vma, struct perf_event *event,
unsigned long nr_pages)
{
@ -7323,8 +7327,6 @@ static int perf_mmap_rb(struct vm_area_struct *vma, struct perf_event *event,
if (!rb)
return -ENOMEM;
refcount_set(&rb->mmap_count, 1);
rb->mmap_user = get_current_user();
rb->mmap_locked = extra;
ring_buffer_attach(event, rb);
@ -7474,16 +7476,54 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
mapped(event, vma->vm_mm);
/*
* Try to map it into the page table. On fail, invoke
* perf_mmap_close() to undo the above, as the callsite expects
* full cleanup in this case and therefore does not invoke
* vmops::close().
* Try to map it into the page table. On fail undo the above,
* as the callsite expects full cleanup in this case and
* therefore does not invoke vmops::close().
*/
ret = map_range(event->rb, vma);
if (ret)
perf_mmap_close(vma);
if (likely(!ret))
return 0;
/* Error path */
/*
* If this is the first mmap(), then event->mmap_count should
* be stable at 1. It is only modified by:
* perf_mmap_{open,close}() and perf_mmap().
*
* The former are not possible because this mmap() hasn't been
* successful yet, and the latter is serialized by
* event->mmap_mutex which we still hold (note that mmap_lock
* is not strictly sufficient here, because the event fd can
* be passed to another process through trivial means like
* fork(), leading to concurrent mmap() from different mm).
*
* Make sure to remove event->rb before releasing
* event->mmap_mutex, such that any concurrent mmap() will not
* attempt use this failed buffer.
*/
if (refcount_read(&event->mmap_count) == 1) {
/*
* Minimal perf_mmap_close(); there can't be AUX or
* other events on account of this being the first.
*/
mapped = get_mapped(event, event_unmapped);
if (mapped)
mapped(event, vma->vm_mm);
perf_mmap_unaccount(vma, event->rb);
ring_buffer_attach(event, NULL); /* drops last rb->refcount */
refcount_set(&event->mmap_count, 0);
return ret;
}
/*
* Otherwise this is an already existing buffer, and there is
* no race vs first exposure, so fall-through and call
* perf_mmap_close().
*/
}
perf_mmap_close(vma);
return ret;
}

View File

@ -67,6 +67,7 @@ static inline void rb_free_rcu(struct rcu_head *rcu_head)
struct perf_buffer *rb;
rb = container_of(rcu_head, struct perf_buffer, rcu_head);
free_uid(rb->mmap_user);
rb_free(rb);
}

View File

@ -340,6 +340,8 @@ ring_buffer_init(struct perf_buffer *rb, long watermark, int flags)
rb->paused = 1;
mutex_init(&rb->aux_mutex);
rb->mmap_user = get_current_user();
refcount_set(&rb->mmap_count, 1);
}
void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags)