drm/gem: Make the GEM LRU lock part of drm_device

Recently, a few races have been discovered in the GEM LRU logic, all
of them caused by the fact the LRU lock is accessed through
gem->lru->lock, and that very same lock also protects changes to
gem->lru, leading to situations where gem->lru needs to first be
accessed without the lock held, to then get the lru to access the lock
through and finally take the lock and do the expected operation.

Currently, the only driver making use of this API (MSM) declares a
device-wide lock, and the user we're about to add (panthor) will
do the same. There's no evidence that we will ever have a driver
that wants different pools of LRUs protected by different locks under
the same drm_device. So we're better off moving this lock to drm_device
and always locking it through obj->dev->gem_lru_mutex, or directly
through dev->gem_lru_mutex.

If anyone ever needs more fine-grained locking, this can be revisited
to pass some drm_gem_lru_pool object representing the pool of LRUs
under a specific lock, but for now, the per-device lock seems to be
enough.

Fixes: e7c2af13f8 ("drm/gem: Add LRU/shrinker helper")
Reported-by: Chia-I Wu <olvaffe@gmail.com>
Closes: https://gitlab.freedesktop.org/panfrost/linux/-/work_items/86
Reviewed-by: Rob Clark <rob.clark@oss.qualcomm.com>
Reviewed-by: Liviu Dudau <liviu.dudau@arm.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Reviewed-by: Chia-I Wu <olvaffe@gmail.com>
Link: https://patch.msgid.link/20260518-panthor-shrinker-fixes-v4-1-1920234470d5@collabora.com
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
This commit is contained in:
Boris Brezillon 2026-05-18 13:41:45 +02:00
parent e02b5262fd
commit 379e8f1ca5
11 changed files with 69 additions and 75 deletions

View File

@ -697,6 +697,7 @@ static void drm_dev_init_release(struct drm_device *dev, void *res)
mutex_destroy(&dev->master_mutex);
mutex_destroy(&dev->clientlist_mutex);
mutex_destroy(&dev->filelist_mutex);
mutex_destroy(&dev->gem_lru_mutex);
}
static int drm_dev_init(struct drm_device *dev,
@ -738,6 +739,7 @@ static int drm_dev_init(struct drm_device *dev,
INIT_LIST_HEAD(&dev->vblank_event_list);
spin_lock_init(&dev->event_lock);
mutex_init(&dev->gem_lru_mutex);
mutex_init(&dev->filelist_mutex);
mutex_init(&dev->clientlist_mutex);
mutex_init(&dev->master_mutex);

View File

@ -1541,12 +1541,10 @@ EXPORT_SYMBOL(drm_gem_unlock_reservations);
* drm_gem_lru_init - initialize a LRU
*
* @lru: The LRU to initialize
* @lock: The lock protecting the LRU
*/
void
drm_gem_lru_init(struct drm_gem_lru *lru, struct mutex *lock)
drm_gem_lru_init(struct drm_gem_lru *lru)
{
lru->lock = lock;
lru->count = 0;
INIT_LIST_HEAD(&lru->list);
}
@ -1571,14 +1569,10 @@ drm_gem_lru_remove_locked(struct drm_gem_object *obj)
void
drm_gem_lru_remove(struct drm_gem_object *obj)
{
struct drm_gem_lru *lru = obj->lru;
if (!lru)
return;
mutex_lock(lru->lock);
drm_gem_lru_remove_locked(obj);
mutex_unlock(lru->lock);
mutex_lock(&obj->dev->gem_lru_mutex);
if (obj->lru)
drm_gem_lru_remove_locked(obj);
mutex_unlock(&obj->dev->gem_lru_mutex);
}
EXPORT_SYMBOL(drm_gem_lru_remove);
@ -1593,7 +1587,7 @@ EXPORT_SYMBOL(drm_gem_lru_remove);
void
drm_gem_lru_move_tail_locked(struct drm_gem_lru *lru, struct drm_gem_object *obj)
{
lockdep_assert_held_once(lru->lock);
lockdep_assert_held_once(&obj->dev->gem_lru_mutex);
if (obj->lru)
drm_gem_lru_remove_locked(obj);
@ -1617,9 +1611,9 @@ EXPORT_SYMBOL(drm_gem_lru_move_tail_locked);
void
drm_gem_lru_move_tail(struct drm_gem_lru *lru, struct drm_gem_object *obj)
{
mutex_lock(lru->lock);
mutex_lock(&obj->dev->gem_lru_mutex);
drm_gem_lru_move_tail_locked(lru, obj);
mutex_unlock(lru->lock);
mutex_unlock(&obj->dev->gem_lru_mutex);
}
EXPORT_SYMBOL(drm_gem_lru_move_tail);
@ -1633,6 +1627,7 @@ EXPORT_SYMBOL(drm_gem_lru_move_tail);
* of the shrink callback to check for this (ie. dma_resv_test_signaled())
* or if necessary block until the buffer becomes idle.
*
* @dev: DRM device the LRU belongs to
* @lru: The LRU to scan
* @nr_to_scan: The number of pages to try to reclaim
* @remaining: The number of pages left to reclaim, should be initialized by caller
@ -1640,7 +1635,8 @@ EXPORT_SYMBOL(drm_gem_lru_move_tail);
* @ticket: Optional ww_acquire_ctx context to use for locking
*/
unsigned long
drm_gem_lru_scan(struct drm_gem_lru *lru,
drm_gem_lru_scan(struct drm_device *dev,
struct drm_gem_lru *lru,
unsigned int nr_to_scan,
unsigned long *remaining,
bool (*shrink)(struct drm_gem_object *obj, struct ww_acquire_ctx *ticket),
@ -1650,9 +1646,9 @@ drm_gem_lru_scan(struct drm_gem_lru *lru,
struct drm_gem_object *obj;
unsigned freed = 0;
drm_gem_lru_init(&still_in_lru, lru->lock);
drm_gem_lru_init(&still_in_lru);
mutex_lock(lru->lock);
mutex_lock(&dev->gem_lru_mutex);
while (freed < nr_to_scan) {
obj = list_first_entry_or_null(&lru->list, typeof(*obj), lru_node);
@ -1675,7 +1671,7 @@ drm_gem_lru_scan(struct drm_gem_lru *lru,
* rest of the loop body, to reduce contention with other
* code paths that need the LRU lock
*/
mutex_unlock(lru->lock);
mutex_unlock(&dev->gem_lru_mutex);
if (ticket)
ww_acquire_init(ticket, &reservation_ww_class);
@ -1709,7 +1705,7 @@ drm_gem_lru_scan(struct drm_gem_lru *lru,
tail:
drm_gem_object_put(obj);
mutex_lock(lru->lock);
mutex_lock(&dev->gem_lru_mutex);
}
/*
@ -1721,7 +1717,7 @@ drm_gem_lru_scan(struct drm_gem_lru *lru,
list_splice_tail(&still_in_lru.list, &lru->list);
lru->count += still_in_lru.count;
mutex_unlock(lru->lock);
mutex_unlock(&dev->gem_lru_mutex);
return freed;
}

View File

@ -128,11 +128,10 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv,
/*
* Initialize the LRUs:
*/
mutex_init(&priv->lru.lock);
drm_gem_lru_init(&priv->lru.unbacked, &priv->lru.lock);
drm_gem_lru_init(&priv->lru.pinned, &priv->lru.lock);
drm_gem_lru_init(&priv->lru.willneed, &priv->lru.lock);
drm_gem_lru_init(&priv->lru.dontneed, &priv->lru.lock);
drm_gem_lru_init(&priv->lru.unbacked);
drm_gem_lru_init(&priv->lru.pinned);
drm_gem_lru_init(&priv->lru.willneed);
drm_gem_lru_init(&priv->lru.dontneed);
/* Initialize stall-on-fault */
spin_lock_init(&priv->fault_stall_lock);
@ -140,7 +139,7 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv,
/* Teach lockdep about lock ordering wrt. shrinker: */
fs_reclaim_acquire(GFP_KERNEL);
might_lock(&priv->lru.lock);
might_lock(&ddev->gem_lru_mutex);
fs_reclaim_release(GFP_KERNEL);
if (priv->kms_init) {

View File

@ -150,13 +150,6 @@ struct msm_drm_private {
* DONTNEED state (ie. can be purged)
*/
struct drm_gem_lru dontneed;
/**
* lock:
*
* Protects manipulation of all of the LRUs.
*/
struct mutex lock;
} lru;
struct notifier_block vmap_notifier;

View File

@ -177,11 +177,11 @@ static void update_lru_locked(struct drm_gem_object *obj)
static void update_lru(struct drm_gem_object *obj)
{
struct msm_drm_private *priv = obj->dev->dev_private;
struct drm_device *dev = obj->dev;
mutex_lock(&priv->lru.lock);
mutex_lock(&dev->gem_lru_mutex);
update_lru_locked(obj);
mutex_unlock(&priv->lru.lock);
mutex_unlock(&dev->gem_lru_mutex);
}
static struct page **get_pages(struct drm_gem_object *obj)
@ -292,11 +292,11 @@ void msm_gem_pin_obj_locked(struct drm_gem_object *obj)
static void pin_obj_locked(struct drm_gem_object *obj)
{
struct msm_drm_private *priv = obj->dev->dev_private;
struct drm_device *dev = obj->dev;
mutex_lock(&priv->lru.lock);
mutex_lock(&dev->gem_lru_mutex);
msm_gem_pin_obj_locked(obj);
mutex_unlock(&priv->lru.lock);
mutex_unlock(&dev->gem_lru_mutex);
}
struct page **msm_gem_pin_pages_locked(struct drm_gem_object *obj)
@ -487,16 +487,16 @@ int msm_gem_pin_vma_locked(struct drm_gem_object *obj, struct drm_gpuva *vma)
void msm_gem_unpin_locked(struct drm_gem_object *obj)
{
struct msm_drm_private *priv = obj->dev->dev_private;
struct drm_device *dev = obj->dev;
struct msm_gem_object *msm_obj = to_msm_bo(obj);
msm_gem_assert_locked(obj);
mutex_lock(&priv->lru.lock);
mutex_lock(&dev->gem_lru_mutex);
msm_obj->pin_count--;
GEM_WARN_ON(msm_obj->pin_count < 0);
update_lru_locked(obj);
mutex_unlock(&priv->lru.lock);
mutex_unlock(&dev->gem_lru_mutex);
}
/* Special unpin path for use in fence-signaling path, avoiding the need
@ -507,10 +507,10 @@ void msm_gem_unpin_locked(struct drm_gem_object *obj)
*/
void msm_gem_unpin_active(struct drm_gem_object *obj)
{
struct msm_drm_private *priv = obj->dev->dev_private;
struct drm_device *dev = obj->dev;
struct msm_gem_object *msm_obj = to_msm_bo(obj);
GEM_WARN_ON(!mutex_is_locked(&priv->lru.lock));
GEM_WARN_ON(!mutex_is_locked(&dev->gem_lru_mutex));
msm_obj->pin_count--;
GEM_WARN_ON(msm_obj->pin_count < 0);
@ -797,12 +797,12 @@ void msm_gem_put_vaddr(struct drm_gem_object *obj)
*/
int msm_gem_madvise(struct drm_gem_object *obj, unsigned madv)
{
struct msm_drm_private *priv = obj->dev->dev_private;
struct drm_device *dev = obj->dev;
struct msm_gem_object *msm_obj = to_msm_bo(obj);
msm_gem_lock(obj);
mutex_lock(&priv->lru.lock);
mutex_lock(&dev->gem_lru_mutex);
if (msm_obj->madv != __MSM_MADV_PURGED)
msm_obj->madv = madv;
@ -814,7 +814,7 @@ int msm_gem_madvise(struct drm_gem_object *obj, unsigned madv)
*/
update_lru_locked(obj);
mutex_unlock(&priv->lru.lock);
mutex_unlock(&dev->gem_lru_mutex);
msm_gem_unlock(obj);
@ -824,7 +824,6 @@ int msm_gem_madvise(struct drm_gem_object *obj, unsigned madv)
void msm_gem_purge(struct drm_gem_object *obj)
{
struct drm_device *dev = obj->dev;
struct msm_drm_private *priv = obj->dev->dev_private;
struct msm_gem_object *msm_obj = to_msm_bo(obj);
msm_gem_assert_locked(obj);
@ -839,10 +838,10 @@ void msm_gem_purge(struct drm_gem_object *obj)
put_pages(obj);
mutex_lock(&priv->lru.lock);
mutex_lock(&dev->gem_lru_mutex);
/* A one-way transition: */
msm_obj->madv = __MSM_MADV_PURGED;
mutex_unlock(&priv->lru.lock);
mutex_unlock(&dev->gem_lru_mutex);
drm_gem_free_mmap_offset(obj);

View File

@ -186,7 +186,7 @@ msm_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
if (!stages[i].cond)
continue;
stages[i].freed =
drm_gem_lru_scan(stages[i].lru, nr,
drm_gem_lru_scan(priv->dev, stages[i].lru, nr,
&stages[i].remaining,
stages[i].shrink,
&ticket);
@ -255,7 +255,7 @@ msm_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr)
unsigned long remaining = 0;
for (idx = 0; lrus[idx] && unmapped < vmap_shrink_limit; idx++) {
unmapped += drm_gem_lru_scan(lrus[idx],
unmapped += drm_gem_lru_scan(priv->dev, lrus[idx],
vmap_shrink_limit - unmapped,
&remaining,
vmap_shrink,

View File

@ -352,7 +352,7 @@ static int submit_fence_sync(struct msm_gem_submit *submit)
static int submit_pin_objects(struct msm_gem_submit *submit)
{
struct msm_drm_private *priv = submit->dev->dev_private;
struct drm_device *dev = submit->dev;
int i, ret = 0;
for (i = 0; i < submit->nr_bos; i++) {
@ -381,11 +381,11 @@ static int submit_pin_objects(struct msm_gem_submit *submit)
* get_pages() which could trigger reclaim.. and if we held the LRU lock
* could trigger deadlock with the shrinker).
*/
mutex_lock(&priv->lru.lock);
mutex_lock(&dev->gem_lru_mutex);
for (i = 0; i < submit->nr_bos; i++) {
msm_gem_pin_obj_locked(submit->bos[i].obj);
}
mutex_unlock(&priv->lru.lock);
mutex_unlock(&dev->gem_lru_mutex);
submit->bos_pinned = true;

View File

@ -702,7 +702,7 @@ static struct dma_fence *
msm_vma_job_run(struct drm_sched_job *_job)
{
struct msm_vm_bind_job *job = to_msm_vm_bind_job(_job);
struct msm_drm_private *priv = job->vm->drm->dev_private;
struct drm_device *dev = job->vm->drm;
struct msm_gem_vm *vm = to_msm_vm(job->vm);
struct drm_gem_object *obj;
int ret = vm->unusable ? -EINVAL : 0;
@ -745,13 +745,13 @@ msm_vma_job_run(struct drm_sched_job *_job)
if (ret)
msm_gem_vm_unusable(job->vm);
mutex_lock(&priv->lru.lock);
mutex_lock(&dev->gem_lru_mutex);
job_foreach_bo (obj, job) {
msm_gem_unpin_active(obj);
}
mutex_unlock(&priv->lru.lock);
mutex_unlock(&dev->gem_lru_mutex);
/* VM_BIND ops are synchronous, so no fence to wait on: */
return NULL;
@ -1305,7 +1305,7 @@ vm_bind_job_pin_objects(struct msm_vm_bind_job *job)
return PTR_ERR(pages);
}
struct msm_drm_private *priv = job->vm->drm->dev_private;
struct drm_device *dev = job->vm->drm;
/*
* A second loop while holding the LRU lock (a) avoids acquiring/dropping
@ -1314,10 +1314,10 @@ vm_bind_job_pin_objects(struct msm_vm_bind_job *job)
* get_pages() which could trigger reclaim.. and if we held the LRU lock
* could trigger deadlock with the shrinker).
*/
mutex_lock(&priv->lru.lock);
mutex_lock(&dev->gem_lru_mutex);
job_foreach_bo (obj, job)
msm_gem_pin_obj_locked(obj);
mutex_unlock(&priv->lru.lock);
mutex_unlock(&dev->gem_lru_mutex);
job->bos_pinned = true;

View File

@ -16,13 +16,13 @@ static struct dma_fence *msm_job_run(struct drm_sched_job *job)
struct msm_gem_submit *submit = to_msm_submit(job);
struct msm_fence_context *fctx = submit->ring->fctx;
struct msm_gpu *gpu = submit->gpu;
struct msm_drm_private *priv = gpu->dev->dev_private;
struct drm_device *dev = gpu->dev;
unsigned nr_cmds = submit->nr_cmds;
int i;
msm_fence_init(submit->hw_fence, fctx);
mutex_lock(&priv->lru.lock);
mutex_lock(&dev->gem_lru_mutex);
for (i = 0; i < submit->nr_bos; i++) {
struct drm_gem_object *obj = submit->bos[i].obj;
@ -32,7 +32,7 @@ static struct dma_fence *msm_job_run(struct drm_sched_job *job)
submit->bos_pinned = false;
mutex_unlock(&priv->lru.lock);
mutex_unlock(&dev->gem_lru_mutex);
/* TODO move submit path over to using a per-ring lock.. */
mutex_lock(&gpu->lock);

View File

@ -375,6 +375,13 @@ struct drm_device {
* Root directory for debugfs files.
*/
struct dentry *debugfs_root;
/**
* @gem_lru_mutex:
*
* Lock protecting movement of GEM objects between LRUs.
*/
struct mutex gem_lru_mutex;
};
void drm_dev_set_dma_dev(struct drm_device *dev, struct device *dma_dev);

View File

@ -245,17 +245,11 @@ struct drm_gem_object_funcs {
* for lockless &shrinker.count_objects, and provides
* &drm_gem_lru_scan for driver's &shrinker.scan_objects
* implementation.
*
* Any access to this kind of object must be done with
* drm_device::gem_lru_mutex held.
*/
struct drm_gem_lru {
/**
* @lock:
*
* Lock protecting movement of GEM objects between LRUs. All
* LRUs that the object can move between should be protected
* by the same lock.
*/
struct mutex *lock;
/**
* @count:
*
@ -453,6 +447,9 @@ struct drm_gem_object {
* @lru:
*
* The current LRU list that the GEM object is on.
*
* Access to this field must be done with drm_device::gem_lru_mutex
* held.
*/
struct drm_gem_lru *lru;
};
@ -610,12 +607,13 @@ void drm_gem_unlock_reservations(struct drm_gem_object **objs, int count,
int drm_gem_dumb_map_offset(struct drm_file *file, struct drm_device *dev,
u32 handle, u64 *offset);
void drm_gem_lru_init(struct drm_gem_lru *lru, struct mutex *lock);
void drm_gem_lru_init(struct drm_gem_lru *lru);
void drm_gem_lru_remove(struct drm_gem_object *obj);
void drm_gem_lru_move_tail_locked(struct drm_gem_lru *lru, struct drm_gem_object *obj);
void drm_gem_lru_move_tail(struct drm_gem_lru *lru, struct drm_gem_object *obj);
unsigned long
drm_gem_lru_scan(struct drm_gem_lru *lru,
drm_gem_lru_scan(struct drm_device *dev,
struct drm_gem_lru *lru,
unsigned int nr_to_scan,
unsigned long *remaining,
bool (*shrink)(struct drm_gem_object *obj, struct ww_acquire_ctx *ticket),