mirror of
https://github.com/torvalds/linux.git
synced 2026-05-24 07:03:03 +02:00
drm/msm: Fix shrinker deadlock
With PROVE_LOCKING on an Snapdragon X1 and VM reclaim pressure, we see:
======================================================
WARNING: possible circular locking dependency detected
7.0.0-debug+ #43 Tainted: G W
------------------------------------------------------
kswapd0/82 is trying to acquire lock:
ffff800080ec3870 (reservation_ww_class_acquire){+.+.}-{0:0}, at: msm_gem_shrinker_scan+0x17c/0x400 [msm]
but task is already holding lock:
ffffc31709b263b8 (fs_reclaim){+.+.}-{0:0}, at: balance_pgdat+0x88/0x988
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #2 (fs_reclaim){+.+.}-{0:0}:
__lock_acquire+0x4d0/0xad0
lock_acquire.part.0+0xc4/0x248
lock_acquire+0x8c/0x248
fs_reclaim_acquire+0xd0/0xf0
dma_resv_lockdep+0x224/0x348
do_one_initcall+0x84/0x5d0
do_initcalls+0x194/0x1d8
kernel_init_freeable+0x128/0x180
kernel_init+0x2c/0x160
ret_from_fork+0x10/0x20
-> #1 (reservation_ww_class_mutex){+.+.}-{4:4}:
__lock_acquire+0x4d0/0xad0
lock_acquire.part.0+0xc4/0x248
lock_acquire+0x8c/0x248
dma_resv_lockdep+0x1a8/0x348
do_one_initcall+0x84/0x5d0
do_initcalls+0x194/0x1d8
kernel_init_freeable+0x128/0x180
kernel_init+0x2c/0x160
ret_from_fork+0x10/0x20
-> #0 (reservation_ww_class_acquire){+.+.}-{0:0}:
check_prev_add+0x114/0x790
validate_chain+0x594/0x6f0
__lock_acquire+0x4d0/0xad0
lock_acquire.part.0+0xc4/0x248
lock_acquire+0x8c/0x248
drm_gem_lru_scan+0x1ac/0x440
msm_gem_shrinker_scan+0x17c/0x400 [msm]
do_shrink_slab+0x150/0x4a0
shrink_slab+0x144/0x460
shrink_one+0x9c/0x1b0
shrink_many+0x27c/0x5c0
shrink_node+0x344/0x550
balance_pgdat+0x2c0/0x988
kswapd+0x11c/0x318
kthread+0x10c/0x128
ret_from_fork+0x10/0x20
other info that might help us debug this:
Chain exists of:
reservation_ww_class_acquire --> reservation_ww_class_mutex --> fs_reclaim
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock(fs_reclaim);
lock(reservation_ww_class_mutex);
lock(fs_reclaim);
lock(reservation_ww_class_acquire);
*** DEADLOCK ***
1 lock held by kswapd0/82:
#0: ffffc31709b263b8 (fs_reclaim){+.+.}-{0:0}, at: balance_pgdat+0x88/0x988
stack backtrace:
CPU: 4 UID: 0 PID: 82 Comm: kswapd0 Tainted: G W 7.0.0-debug+ #43 PREEMPT(full)
Tainted: [W]=WARN
Hardware name: LENOVO 21BX0016US/21BX0016US, BIOS N3HET94W (1.66 ) 09/15/2025
Call trace:
show_stack+0x20/0x40 (C)
dump_stack_lvl+0x9c/0xd0
dump_stack+0x18/0x30
print_circular_bug+0x114/0x120
check_noncircular+0x178/0x198
check_prev_add+0x114/0x790
validate_chain+0x594/0x6f0
__lock_acquire+0x4d0/0xad0
lock_acquire.part.0+0xc4/0x248
lock_acquire+0x8c/0x248
drm_gem_lru_scan+0x1ac/0x440
msm_gem_shrinker_scan+0x17c/0x400 [msm]
do_shrink_slab+0x150/0x4a0
shrink_slab+0x144/0x460
shrink_one+0x9c/0x1b0
shrink_many+0x27c/0x5c0
shrink_node+0x344/0x550
balance_pgdat+0x2c0/0x988
kswapd+0x11c/0x318
kthread+0x10c/0x128
ret_from_fork+0x10/0x20
kswapd0 holding fs_reclaim calls the MSM shrinker, which calls
dma_resv_lock. This in turn acquires fs_reclaim.
Fix this deadlock by using dma_resv_trylock() instead, dropping the
subsequently unused passed wait-wound lock 'ticket'.
Cc: stable@vger.kernel.org
Signed-off-by: Daniel J Blueman <daniel@quora.org>
Fixes: fe4952b5f2 ("drm/msm: Convert vm locking")
Patchwork: https://patchwork.freedesktop.org/patch/723564/
Message-ID: <20260508065722.18785-1-daniel@quora.org>
[rob: fixup compile errors, replace lockdep splat with something legible]
Signed-off-by: Rob Clark <robin.clark@oss.qualcomm.com>
This commit is contained in:
parent
3f9ed5f5aa
commit
3392291fc5
|
|
@ -43,8 +43,7 @@ msm_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
|
|||
}
|
||||
|
||||
static bool
|
||||
with_vm_locks(struct ww_acquire_ctx *ticket,
|
||||
void (*fn)(struct drm_gem_object *obj),
|
||||
with_vm_locks(void (*fn)(struct drm_gem_object *obj),
|
||||
struct drm_gem_object *obj)
|
||||
{
|
||||
/*
|
||||
|
|
@ -52,7 +51,7 @@ with_vm_locks(struct ww_acquire_ctx *ticket,
|
|||
* success paths
|
||||
*/
|
||||
struct drm_gpuvm_bo *vm_bo, *last_locked = NULL;
|
||||
int ret = 0;
|
||||
bool locked = true;
|
||||
|
||||
drm_gem_for_each_gpuvm_bo (vm_bo, obj) {
|
||||
struct dma_resv *resv = drm_gpuvm_resv(vm_bo->vm);
|
||||
|
|
@ -60,23 +59,14 @@ with_vm_locks(struct ww_acquire_ctx *ticket,
|
|||
if (resv == obj->resv)
|
||||
continue;
|
||||
|
||||
ret = dma_resv_lock(resv, ticket);
|
||||
|
||||
/*
|
||||
* Since we already skip the case when the VM and obj
|
||||
* share a resv (ie. _NO_SHARE objs), we don't expect
|
||||
* to hit a double-locking scenario... which the lock
|
||||
* unwinding cannot really cope with.
|
||||
* dma_resv_lock can't be used due to acquiring 'ticket' before the
|
||||
* fs_reclaim lock, which is held in shrinker context
|
||||
*/
|
||||
WARN_ON(ret == -EALREADY);
|
||||
|
||||
/*
|
||||
* Don't bother with slow-lock / backoff / retry sequence,
|
||||
* if we can't get the lock just give up and move on to
|
||||
* the next object.
|
||||
*/
|
||||
if (ret)
|
||||
if (!dma_resv_trylock(resv)) {
|
||||
locked = false;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
/*
|
||||
* Hold a ref to prevent the vm_bo from being freed
|
||||
|
|
@ -108,11 +98,11 @@ with_vm_locks(struct ww_acquire_ctx *ticket,
|
|||
}
|
||||
}
|
||||
|
||||
return ret == 0;
|
||||
return locked;
|
||||
}
|
||||
|
||||
static bool
|
||||
purge(struct drm_gem_object *obj, struct ww_acquire_ctx *ticket)
|
||||
purge(struct drm_gem_object *obj, struct ww_acquire_ctx *)
|
||||
{
|
||||
if (!is_purgeable(to_msm_bo(obj)))
|
||||
return false;
|
||||
|
|
@ -120,11 +110,11 @@ purge(struct drm_gem_object *obj, struct ww_acquire_ctx *ticket)
|
|||
if (msm_gem_active(obj))
|
||||
return false;
|
||||
|
||||
return with_vm_locks(ticket, msm_gem_purge, obj);
|
||||
return with_vm_locks(msm_gem_purge, obj);
|
||||
}
|
||||
|
||||
static bool
|
||||
evict(struct drm_gem_object *obj, struct ww_acquire_ctx *ticket)
|
||||
evict(struct drm_gem_object *obj, struct ww_acquire_ctx *)
|
||||
{
|
||||
if (is_unevictable(to_msm_bo(obj)))
|
||||
return false;
|
||||
|
|
@ -132,7 +122,7 @@ evict(struct drm_gem_object *obj, struct ww_acquire_ctx *ticket)
|
|||
if (msm_gem_active(obj))
|
||||
return false;
|
||||
|
||||
return with_vm_locks(ticket, msm_gem_evict, obj);
|
||||
return with_vm_locks(msm_gem_evict, obj);
|
||||
}
|
||||
|
||||
static bool
|
||||
|
|
@ -164,7 +154,6 @@ static unsigned long
|
|||
msm_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
|
||||
{
|
||||
struct msm_drm_private *priv = shrinker->private_data;
|
||||
struct ww_acquire_ctx ticket;
|
||||
struct {
|
||||
struct drm_gem_lru *lru;
|
||||
bool (*shrink)(struct drm_gem_object *obj, struct ww_acquire_ctx *ticket);
|
||||
|
|
@ -185,11 +174,14 @@ msm_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
|
|||
for (unsigned i = 0; (nr > 0) && (i < ARRAY_SIZE(stages)); i++) {
|
||||
if (!stages[i].cond)
|
||||
continue;
|
||||
/*
|
||||
* 'ticket' not needed on trylock paths
|
||||
*/
|
||||
stages[i].freed =
|
||||
drm_gem_lru_scan(stages[i].lru, nr,
|
||||
&stages[i].remaining,
|
||||
stages[i].shrink,
|
||||
&ticket);
|
||||
NULL);
|
||||
nr -= stages[i].freed;
|
||||
freed += stages[i].freed;
|
||||
remaining += stages[i].remaining;
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user