drm/xe/svm: Serialize migration to device if racing

Introduce an rw-semaphore to serialize migration to device if
it's likely that migration races with another device migration
of the same CPU address space range.
This is a temporary fix to attempt to mitigate a livelock that
might happen if many devices try to migrate a range at the same
time, and it affects only devices using the xe driver.
A longer term fix is probably improvements in the core mm
migration layer.

Suggested-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patch.msgid.link/20251219113320.183860-25-thomas.hellstrom@linux.intel.com
This commit is contained in:
Thomas Hellström 2025-12-19 12:33:20 +01:00
parent ec265e1f1c
commit 0620837490

View File

@ -1593,10 +1593,12 @@ struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *t
int xe_svm_alloc_vram(struct xe_svm_range *range, const struct drm_gpusvm_ctx *ctx,
struct drm_pagemap *dpagemap)
{
static DECLARE_RWSEM(driver_migrate_lock);
struct xe_vm *vm = range_to_vm(&range->base);
enum drm_gpusvm_scan_result migration_state;
struct xe_device *xe = vm->xe;
int err, retries = 1;
bool write_locked = false;
xe_assert(range_to_vm(&range->base)->xe, range->base.pages.flags.migrate_devmem);
range_debug(range, "ALLOCATE VRAM");
@ -1615,16 +1617,32 @@ int xe_svm_alloc_vram(struct xe_svm_range *range, const struct drm_gpusvm_ctx *c
drm_dbg(&xe->drm, "Request migration to device memory on \"%s\".\n",
dpagemap->drm->unique);
err = down_read_interruptible(&driver_migrate_lock);
if (err)
return err;
do {
err = drm_pagemap_populate_mm(dpagemap, xe_svm_range_start(range),
xe_svm_range_end(range),
range->base.gpusvm->mm,
ctx->timeslice_ms);
if (err == -EBUSY && retries)
drm_gpusvm_range_evict(range->base.gpusvm, &range->base);
if (err == -EBUSY && retries) {
if (!write_locked) {
int lock_err;
up_read(&driver_migrate_lock);
lock_err = down_write_killable(&driver_migrate_lock);
if (lock_err)
return lock_err;
write_locked = true;
}
drm_gpusvm_range_evict(range->base.gpusvm, &range->base);
}
} while (err == -EBUSY && retries--);
if (write_locked)
up_write(&driver_migrate_lock);
else
up_read(&driver_migrate_lock);
return err;
}