mirror of
https://github.com/torvalds/linux.git
synced 2026-05-30 10:04:04 +02:00
drm/xe/vf: Start re-emission from first unsignaled job during VF migration
The LRC software ring tail is reset to the first unsignaled pending
job's head.
Fix the re-emission logic to begin submitting from the first unsignaled
job detected, rather than scanning all pending jobs, which can cause
imbalance.
v2:
- Include missing local changes
v3:
- s/skip_replay/restore_replay (Tomasz)
Fixes: c25c1010df ("drm/xe/vf: Replay GuC submission state on pause / unpause")
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Tomasz Lis <tomasz.lis@intel.com>
Link: https://patch.msgid.link/20251121152750.240557-1-matthew.brost@intel.com
This commit is contained in:
parent
2e02254ef5
commit
00937fe192
|
|
@ -54,13 +54,14 @@ static inline void xe_sched_tdr_queue_imm(struct xe_gpu_scheduler *sched)
|
||||||
static inline void xe_sched_resubmit_jobs(struct xe_gpu_scheduler *sched)
|
static inline void xe_sched_resubmit_jobs(struct xe_gpu_scheduler *sched)
|
||||||
{
|
{
|
||||||
struct drm_sched_job *s_job;
|
struct drm_sched_job *s_job;
|
||||||
|
bool restore_replay = false;
|
||||||
|
|
||||||
list_for_each_entry(s_job, &sched->base.pending_list, list) {
|
list_for_each_entry(s_job, &sched->base.pending_list, list) {
|
||||||
struct drm_sched_fence *s_fence = s_job->s_fence;
|
struct drm_sched_fence *s_fence = s_job->s_fence;
|
||||||
struct dma_fence *hw_fence = s_fence->parent;
|
struct dma_fence *hw_fence = s_fence->parent;
|
||||||
|
|
||||||
if (to_xe_sched_job(s_job)->skip_emit ||
|
restore_replay |= to_xe_sched_job(s_job)->restore_replay;
|
||||||
(hw_fence && !dma_fence_is_signaled(hw_fence)))
|
if (restore_replay || (hw_fence && !dma_fence_is_signaled(hw_fence)))
|
||||||
sched->base.ops->run_job(s_job);
|
sched->base.ops->run_job(s_job);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -822,7 +822,7 @@ static void submit_exec_queue(struct xe_exec_queue *q, struct xe_sched_job *job)
|
||||||
|
|
||||||
xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
|
xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
|
||||||
|
|
||||||
if (!job->skip_emit || job->last_replay) {
|
if (!job->restore_replay || job->last_replay) {
|
||||||
if (xe_exec_queue_is_parallel(q))
|
if (xe_exec_queue_is_parallel(q))
|
||||||
wq_item_append(q);
|
wq_item_append(q);
|
||||||
else
|
else
|
||||||
|
|
@ -881,10 +881,10 @@ guc_exec_queue_run_job(struct drm_sched_job *drm_job)
|
||||||
if (!killed_or_banned_or_wedged && !xe_sched_job_is_error(job)) {
|
if (!killed_or_banned_or_wedged && !xe_sched_job_is_error(job)) {
|
||||||
if (!exec_queue_registered(q))
|
if (!exec_queue_registered(q))
|
||||||
register_exec_queue(q, GUC_CONTEXT_NORMAL);
|
register_exec_queue(q, GUC_CONTEXT_NORMAL);
|
||||||
if (!job->skip_emit)
|
if (!job->restore_replay)
|
||||||
q->ring_ops->emit_job(job);
|
q->ring_ops->emit_job(job);
|
||||||
submit_exec_queue(q, job);
|
submit_exec_queue(q, job);
|
||||||
job->skip_emit = false;
|
job->restore_replay = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -2147,6 +2147,8 @@ static void guc_exec_queue_pause(struct xe_guc *guc, struct xe_exec_queue *q)
|
||||||
|
|
||||||
job = xe_sched_first_pending_job(sched);
|
job = xe_sched_first_pending_job(sched);
|
||||||
if (job) {
|
if (job) {
|
||||||
|
job->restore_replay = true;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Adjust software tail so jobs submitted overwrite previous
|
* Adjust software tail so jobs submitted overwrite previous
|
||||||
* position in ring buffer with new GGTT addresses.
|
* position in ring buffer with new GGTT addresses.
|
||||||
|
|
@ -2236,17 +2238,18 @@ static void guc_exec_queue_unpause_prepare(struct xe_guc *guc,
|
||||||
struct xe_exec_queue *q)
|
struct xe_exec_queue *q)
|
||||||
{
|
{
|
||||||
struct xe_gpu_scheduler *sched = &q->guc->sched;
|
struct xe_gpu_scheduler *sched = &q->guc->sched;
|
||||||
struct drm_sched_job *s_job;
|
|
||||||
struct xe_sched_job *job = NULL;
|
struct xe_sched_job *job = NULL;
|
||||||
|
bool restore_replay = false;
|
||||||
|
|
||||||
list_for_each_entry(s_job, &sched->base.pending_list, list) {
|
list_for_each_entry(job, &sched->base.pending_list, drm.list) {
|
||||||
job = to_xe_sched_job(s_job);
|
restore_replay |= job->restore_replay;
|
||||||
|
if (restore_replay) {
|
||||||
|
xe_gt_dbg(guc_to_gt(guc), "Replay JOB - guc_id=%d, seqno=%d",
|
||||||
|
q->guc->id, xe_sched_job_seqno(job));
|
||||||
|
|
||||||
xe_gt_dbg(guc_to_gt(guc), "Replay JOB - guc_id=%d, seqno=%d",
|
q->ring_ops->emit_job(job);
|
||||||
q->guc->id, xe_sched_job_seqno(job));
|
job->restore_replay = true;
|
||||||
|
}
|
||||||
q->ring_ops->emit_job(job);
|
|
||||||
job->skip_emit = true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (job)
|
if (job)
|
||||||
|
|
|
||||||
|
|
@ -63,8 +63,8 @@ struct xe_sched_job {
|
||||||
bool ring_ops_flush_tlb;
|
bool ring_ops_flush_tlb;
|
||||||
/** @ggtt: mapped in ggtt. */
|
/** @ggtt: mapped in ggtt. */
|
||||||
bool ggtt;
|
bool ggtt;
|
||||||
/** @skip_emit: skip emitting the job */
|
/** @restore_replay: job being replayed for restore */
|
||||||
bool skip_emit;
|
bool restore_replay;
|
||||||
/** @last_replay: last job being replayed */
|
/** @last_replay: last job being replayed */
|
||||||
bool last_replay;
|
bool last_replay;
|
||||||
/** @ptrs: per instance pointers. */
|
/** @ptrs: per instance pointers. */
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user