mirror of
https://github.com/torvalds/linux.git
synced 2026-05-31 18:43:33 +02:00
drm/xe/vf: Avoid indefinite blocking in preempt rebind worker for VFs supporting migration
Blocking in work queues on a hardware action that may never occur — especially when it depends on a software fixup also scheduled on the a work queue — is a recipe for deadlock. This situation arises with the preempt rebind worker and VF post-migration recovery. To prevent potential deadlocks, avoid indefinite blocking in the preempt rebind worker for VFs that support migration. v4: - Use dma_fence_wait_timeout (CI) Signed-off-by: Matthew Brost <matthew.brost@intel.com> Reviewed-by: Tomasz Lis <tomasz.lis@intel.com> Link: https://lore.kernel.org/r/20251008214532.3442967-19-matthew.brost@intel.com
This commit is contained in:
parent
a4dae94aad
commit
1faeeea056
|
|
@ -35,6 +35,7 @@
|
|||
#include "xe_pt.h"
|
||||
#include "xe_pxp.h"
|
||||
#include "xe_res_cursor.h"
|
||||
#include "xe_sriov_vf.h"
|
||||
#include "xe_svm.h"
|
||||
#include "xe_sync.h"
|
||||
#include "xe_tile.h"
|
||||
|
|
@ -111,12 +112,22 @@ static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list,
|
|||
static int wait_for_existing_preempt_fences(struct xe_vm *vm)
|
||||
{
|
||||
struct xe_exec_queue *q;
|
||||
bool vf_migration = IS_SRIOV_VF(vm->xe) &&
|
||||
xe_sriov_vf_migration_supported(vm->xe);
|
||||
signed long wait_time = vf_migration ? HZ / 5 : MAX_SCHEDULE_TIMEOUT;
|
||||
|
||||
xe_vm_assert_held(vm);
|
||||
|
||||
list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
|
||||
if (q->lr.pfence) {
|
||||
long timeout = dma_fence_wait(q->lr.pfence, false);
|
||||
long timeout;
|
||||
|
||||
timeout = dma_fence_wait_timeout(q->lr.pfence, false,
|
||||
wait_time);
|
||||
if (!timeout) {
|
||||
xe_assert(vm->xe, vf_migration);
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
/* Only -ETIME on fence indicates VM needs to be killed */
|
||||
if (timeout < 0 || q->lr.pfence->error == -ETIME)
|
||||
|
|
@ -541,6 +552,19 @@ static void preempt_rebind_work_func(struct work_struct *w)
|
|||
out_unlock_outer:
|
||||
if (err == -EAGAIN) {
|
||||
trace_xe_vm_rebind_worker_retry(vm);
|
||||
|
||||
/*
|
||||
* We can't block in workers on a VF which supports migration
|
||||
* given this can block the VF post-migration workers from
|
||||
* getting scheduled.
|
||||
*/
|
||||
if (IS_SRIOV_VF(vm->xe) &&
|
||||
xe_sriov_vf_migration_supported(vm->xe)) {
|
||||
up_write(&vm->lock);
|
||||
xe_vm_queue_rebind_worker(vm);
|
||||
return;
|
||||
}
|
||||
|
||||
goto retry;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user