diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 4e914928e0a9..faca626702b8 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -35,6 +35,7 @@ #include "xe_pt.h" #include "xe_pxp.h" #include "xe_res_cursor.h" +#include "xe_sriov_vf.h" #include "xe_svm.h" #include "xe_sync.h" #include "xe_tile.h" @@ -111,12 +112,22 @@ static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list, static int wait_for_existing_preempt_fences(struct xe_vm *vm) { struct xe_exec_queue *q; + bool vf_migration = IS_SRIOV_VF(vm->xe) && + xe_sriov_vf_migration_supported(vm->xe); + signed long wait_time = vf_migration ? HZ / 5 : MAX_SCHEDULE_TIMEOUT; xe_vm_assert_held(vm); list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { if (q->lr.pfence) { - long timeout = dma_fence_wait(q->lr.pfence, false); + long timeout; + + timeout = dma_fence_wait_timeout(q->lr.pfence, false, + wait_time); + if (!timeout) { + xe_assert(vm->xe, vf_migration); + return -EAGAIN; + } /* Only -ETIME on fence indicates VM needs to be killed */ if (timeout < 0 || q->lr.pfence->error == -ETIME) @@ -541,6 +552,19 @@ static void preempt_rebind_work_func(struct work_struct *w) out_unlock_outer: if (err == -EAGAIN) { trace_xe_vm_rebind_worker_retry(vm); + + /* + * We can't block in workers on a VF which supports migration + * given this can block the VF post-migration workers from + * getting scheduled. + */ + if (IS_SRIOV_VF(vm->xe) && + xe_sriov_vf_migration_supported(vm->xe)) { + up_write(&vm->lock); + xe_vm_queue_rebind_worker(vm); + return; + } + goto retry; }