drm/amdgpu: extract amdgpu_vm_lock_by_pasid from amdgpu_vm_handle_fault

This is tricky to implement right and we're going to need
it from the devcoredump.

Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Pierre-Eric Pelloux-Prayer 2026-02-04 16:41:11 +01:00 committed by Alex Deucher
parent d1f188b182
commit 1b135c6da0
2 changed files with 54 additions and 29 deletions

View File

@ -2949,6 +2949,50 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
return 0;
}
/**
* amdgpu_vm_lock_by_pasid - return an amdgpu_vm and its root bo from a pasid, if possible.
* @adev: amdgpu device pointer
* @root: root BO of the VM
* @pasid: PASID of the VM
* The caller needs to unreserve and unref the root bo on success.
*/
struct amdgpu_vm *amdgpu_vm_lock_by_pasid(struct amdgpu_device *adev,
struct amdgpu_bo **root, u32 pasid)
{
unsigned long irqflags;
struct amdgpu_vm *vm;
int r;
xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
vm = xa_load(&adev->vm_manager.pasids, pasid);
*root = vm ? amdgpu_bo_ref(vm->root.bo) : NULL;
xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
if (!*root)
return NULL;
r = amdgpu_bo_reserve(*root, true);
if (r)
goto error_unref;
/* Double check that the VM still exists */
xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
vm = xa_load(&adev->vm_manager.pasids, pasid);
if (vm && vm->root.bo != *root)
vm = NULL;
xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
if (!vm)
goto error_unlock;
return vm;
error_unlock:
amdgpu_bo_unreserve(*root);
error_unref:
amdgpu_bo_unref(root);
return NULL;
}
/**
* amdgpu_vm_handle_fault - graceful handling of VM faults.
* @adev: amdgpu device pointer
@ -2964,50 +3008,29 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
* shouldn't be reported any more.
*/
bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
u32 vmid, u32 node_id, uint64_t addr, uint64_t ts,
bool write_fault)
u32 vmid, u32 node_id, uint64_t addr,
uint64_t ts, bool write_fault)
{
bool is_compute_context = false;
struct amdgpu_bo *root;
unsigned long irqflags;
uint64_t value, flags;
struct amdgpu_vm *vm;
int r;
xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
vm = xa_load(&adev->vm_manager.pasids, pasid);
if (vm) {
root = amdgpu_bo_ref(vm->root.bo);
is_compute_context = vm->is_compute_context;
} else {
root = NULL;
}
xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
if (!root)
vm = amdgpu_vm_lock_by_pasid(adev, &root, pasid);
if (!vm)
return false;
is_compute_context = vm->is_compute_context;
if (is_compute_context && !svm_range_restore_pages(adev, pasid, vmid,
node_id, addr >> PAGE_SHIFT, ts, write_fault)) {
amdgpu_bo_unreserve(root);
amdgpu_bo_unref(&root);
return true;
}
addr /= AMDGPU_GPU_PAGE_SIZE;
r = amdgpu_bo_reserve(root, true);
if (r)
goto error_unref;
/* Double check that the VM still exists */
xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
vm = xa_load(&adev->vm_manager.pasids, pasid);
if (vm && vm->root.bo != root)
vm = NULL;
xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
if (!vm)
goto error_unlock;
flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED |
AMDGPU_PTE_SYSTEM;
@ -3046,7 +3069,6 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
if (r < 0)
dev_err(adev->dev, "Can't handle page fault (%d)\n", r);
error_unref:
amdgpu_bo_unref(&root);
return false;

View File

@ -592,6 +592,9 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
u32 vmid, u32 node_id, uint64_t addr, uint64_t ts,
bool write_fault);
struct amdgpu_vm *amdgpu_vm_lock_by_pasid(struct amdgpu_device *adev,
struct amdgpu_bo **root, u32 pasid);
void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);
void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,