drm/xe/multi_queue: Reset GT upon CGP_SYNC failure

If GuC doesn't response to CGP_SYNC message, trigger
GT reset and cleanup of all the queues of the multi
queue group.

Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patch.msgid.link/20251211010249.1647839-31-niranjana.vishwanathapura@intel.com
This commit is contained in:
Niranjana Vishwanathapura 2025-12-10 17:03:00 -08:00
parent c85285b32c
commit bb9343f122

View File

@ -593,6 +593,23 @@ static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q)
xe_sched_tdr_queue_imm(&q->guc->sched);
}
static void xe_guc_exec_queue_group_trigger_cleanup(struct xe_exec_queue *q)
{
struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q);
struct xe_exec_queue_group *group = q->multi_queue.group;
struct xe_exec_queue *eq;
xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)),
xe_exec_queue_is_multi_queue(q));
xe_guc_exec_queue_trigger_cleanup(primary);
mutex_lock(&group->list_lock);
list_for_each_entry(eq, &group->list, multi_queue.link)
xe_guc_exec_queue_trigger_cleanup(eq);
mutex_unlock(&group->list_lock);
}
static void xe_guc_exec_queue_reset_trigger_cleanup(struct xe_exec_queue *q)
{
if (xe_exec_queue_is_multi_queue(q)) {
@ -618,6 +635,23 @@ static void xe_guc_exec_queue_reset_trigger_cleanup(struct xe_exec_queue *q)
}
}
static void set_exec_queue_group_banned(struct xe_exec_queue *q)
{
struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q);
struct xe_exec_queue_group *group = q->multi_queue.group;
struct xe_exec_queue *eq;
/* Ban all queues of the multi-queue group */
xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)),
xe_exec_queue_is_multi_queue(q));
set_exec_queue_banned(primary);
mutex_lock(&group->list_lock);
list_for_each_entry(eq, &group->list, multi_queue.link)
set_exec_queue_banned(eq);
mutex_unlock(&group->list_lock);
}
#define parallel_read(xe_, map_, field_) \
xe_map_rd_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \
field_)
@ -677,7 +711,11 @@ static void xe_guc_exec_queue_group_cgp_sync(struct xe_guc *guc,
!READ_ONCE(group->sync_pending) ||
xe_guc_read_stopped(guc), HZ);
if (!ret || xe_guc_read_stopped(guc)) {
/* CGP_SYNC failed. Reset gt, cleanup the group */
xe_gt_warn(guc_to_gt(guc), "Wait for CGP_SYNC_DONE response failed!\n");
set_exec_queue_group_banned(q);
xe_gt_reset_async(q->gt);
xe_guc_exec_queue_group_trigger_cleanup(q);
return;
}