From bf213ac63721b8fbbf0fc07eea6366419826ae8b Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Thu, 20 Nov 2025 18:14:35 +0200 Subject: [PATCH 1/4] drm/xe: Fix memory leak when handling pagefault vma MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the pagefault handling code was moved to a new file, an extra drm_exec_init() was added to the VMA path. This call is unnecessary because xe_validation_ctx_init() already performs a drm_exec_init(), resulting in a memory leak reported by kmemleak. Remove the redundant drm_exec_init() from the VMA pagefault handling code. Fixes: fb544b844508 ("drm/xe: Implement xe_pagefault_queue_work") Cc: Matthew Brost Cc: Stuart Summers Cc: Lucas De Marchi Cc: "Thomas Hellström" Cc: Rodrigo Vivi Cc: Sumit Semwal Cc: "Christian König" Cc: intel-xe@lists.freedesktop.org Cc: linux-media@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Cc: linaro-mm-sig@lists.linaro.org Signed-off-by: Mika Kuoppala Reviewed-by: Thomas Hellström Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patch.msgid.link/20251120161435.3674556-1-mika.kuoppala@linux.intel.com (cherry picked from commit 62519b77aecad22b525eda482660ffa127e7ad80) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_pagefault.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_pagefault.c b/drivers/gpu/drm/xe/xe_pagefault.c index fe3e40145012..afb06598b6e1 100644 --- a/drivers/gpu/drm/xe/xe_pagefault.c +++ b/drivers/gpu/drm/xe/xe_pagefault.c @@ -102,7 +102,6 @@ static int xe_pagefault_handle_vma(struct xe_gt *gt, struct xe_vma *vma, /* Lock VM and BOs dma-resv */ xe_validation_ctx_init(&ctx, &vm->xe->val, &exec, (struct xe_val_flags) {}); - drm_exec_init(&exec, 0, 0); drm_exec_until_all_locked(&exec) { err = xe_pagefault_begin(&exec, vma, tile->mem.vram, needs_vram == 1); From 14a8d83cbe7b929ee601fd2a48b4642cf80b39f4 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Sat, 15 Nov 2025 16:13:22 +0100 Subject: [PATCH 2/4] drm/xe/pf: Use div_u64 when calculating GGTT profile MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will fix the following error seen on some 32-bit config: "ERROR: modpost: "__udivdi3" [drivers/gpu/drm/xe/xe.ko] undefined!" Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202511150929.3vUi6PEJ-lkp@intel.com/ Fixes: e448372e8a8e ("drm/xe/pf: Use migration-friendly GGTT auto-provisioning") Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Link: https://patch.msgid.link/20251115151323.10828-1-michal.wajdeczko@intel.com (cherry picked from commit 0f4435a1f46efc3177eb082cd3f73e29da5ab86a) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 62f6cc45a764..59c5c6b4d994 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -711,7 +711,7 @@ static u64 pf_profile_fair_ggtt(struct xe_gt *gt, unsigned int num_vfs) if (num_vfs > 56) return SZ_64M - SZ_8M; - return rounddown_pow_of_two(shareable / num_vfs); + return rounddown_pow_of_two(div_u64(shareable, num_vfs)); } /** From 3d98a7164da666634c76647abc94218fff3d4f92 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 21 Nov 2025 07:27:50 -0800 Subject: [PATCH 3/4] drm/xe/vf: Start re-emission from first unsignaled job during VF migration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The LRC software ring tail is reset to the first unsignaled pending job's head. Fix the re-emission logic to begin submitting from the first unsignaled job detected, rather than scanning all pending jobs, which can cause imbalance. v2: - Include missing local changes v3: - s/skip_replay/restore_replay (Tomasz) Fixes: c25c1010df88 ("drm/xe/vf: Replay GuC submission state on pause / unpause") Signed-off-by: Matthew Brost Reviewed-by: Tomasz Lis Link: https://patch.msgid.link/20251121152750.240557-1-matthew.brost@intel.com (cherry picked from commit 00937fe1921ab346b6f6a4beaa5c38e14733caa3) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_gpu_scheduler.h | 5 +++-- drivers/gpu/drm/xe/xe_guc_submit.c | 25 ++++++++++++++----------- drivers/gpu/drm/xe/xe_sched_job_types.h | 4 ++-- 3 files changed, 19 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.h b/drivers/gpu/drm/xe/xe_gpu_scheduler.h index 9955397aaaa9..c7a77a3a9681 100644 --- a/drivers/gpu/drm/xe/xe_gpu_scheduler.h +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.h @@ -54,13 +54,14 @@ static inline void xe_sched_tdr_queue_imm(struct xe_gpu_scheduler *sched) static inline void xe_sched_resubmit_jobs(struct xe_gpu_scheduler *sched) { struct drm_sched_job *s_job; + bool restore_replay = false; list_for_each_entry(s_job, &sched->base.pending_list, list) { struct drm_sched_fence *s_fence = s_job->s_fence; struct dma_fence *hw_fence = s_fence->parent; - if (to_xe_sched_job(s_job)->skip_emit || - (hw_fence && !dma_fence_is_signaled(hw_fence))) + restore_replay |= to_xe_sched_job(s_job)->restore_replay; + if (restore_replay || (hw_fence && !dma_fence_is_signaled(hw_fence))) sched->base.ops->run_job(s_job); } } diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index d4ffdb71ef3d..c56fd44641f6 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -822,7 +822,7 @@ static void submit_exec_queue(struct xe_exec_queue *q, struct xe_sched_job *job) xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); - if (!job->skip_emit || job->last_replay) { + if (!job->restore_replay || job->last_replay) { if (xe_exec_queue_is_parallel(q)) wq_item_append(q); else @@ -881,10 +881,10 @@ guc_exec_queue_run_job(struct drm_sched_job *drm_job) if (!killed_or_banned_or_wedged && !xe_sched_job_is_error(job)) { if (!exec_queue_registered(q)) register_exec_queue(q, GUC_CONTEXT_NORMAL); - if (!job->skip_emit) + if (!job->restore_replay) q->ring_ops->emit_job(job); submit_exec_queue(q, job); - job->skip_emit = false; + job->restore_replay = false; } /* @@ -2152,6 +2152,8 @@ static void guc_exec_queue_pause(struct xe_guc *guc, struct xe_exec_queue *q) job = xe_sched_first_pending_job(sched); if (job) { + job->restore_replay = true; + /* * Adjust software tail so jobs submitted overwrite previous * position in ring buffer with new GGTT addresses. @@ -2241,17 +2243,18 @@ static void guc_exec_queue_unpause_prepare(struct xe_guc *guc, struct xe_exec_queue *q) { struct xe_gpu_scheduler *sched = &q->guc->sched; - struct drm_sched_job *s_job; struct xe_sched_job *job = NULL; + bool restore_replay = false; - list_for_each_entry(s_job, &sched->base.pending_list, list) { - job = to_xe_sched_job(s_job); + list_for_each_entry(job, &sched->base.pending_list, drm.list) { + restore_replay |= job->restore_replay; + if (restore_replay) { + xe_gt_dbg(guc_to_gt(guc), "Replay JOB - guc_id=%d, seqno=%d", + q->guc->id, xe_sched_job_seqno(job)); - xe_gt_dbg(guc_to_gt(guc), "Replay JOB - guc_id=%d, seqno=%d", - q->guc->id, xe_sched_job_seqno(job)); - - q->ring_ops->emit_job(job); - job->skip_emit = true; + q->ring_ops->emit_job(job); + job->restore_replay = true; + } } if (job) diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h b/drivers/gpu/drm/xe/xe_sched_job_types.h index d26612abb4ca..7c4c54fe920a 100644 --- a/drivers/gpu/drm/xe/xe_sched_job_types.h +++ b/drivers/gpu/drm/xe/xe_sched_job_types.h @@ -63,8 +63,8 @@ struct xe_sched_job { bool ring_ops_flush_tlb; /** @ggtt: mapped in ggtt. */ bool ggtt; - /** @skip_emit: skip emitting the job */ - bool skip_emit; + /** @restore_replay: job being replayed for restore */ + bool restore_replay; /** @last_replay: last job being replayed */ bool last_replay; /** @ptrs: per instance pointers. */ From d72312d730450aab225a80bc84436757b85b08b5 Mon Sep 17 00:00:00 2001 From: Tomasz Lis Date: Mon, 24 Nov 2025 23:28:53 +0100 Subject: [PATCH 4/4] drm/xe: Protect against unset LRC when pausing submissions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While pausing submissions, it is possible to encouner an exec queue which is during creation, and therefore doesn't have a valid xe_lrc struct reference. Protect agains such situation, by checking for NULL before access. Reviewed-by: Matthew Brost Fixes: c25c1010df88 ("drm/xe/vf: Replay GuC submission state on pause / unpause") Signed-off-by: Tomasz Lis Signed-off-by: Matthew Brost Link: https://patch.msgid.link/20251124222853.1900800-1-tomasz.lis@intel.com (cherry picked from commit 07cf4b864f523f01d2bb522a05813df30b076ba8) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_guc_submit.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index c56fd44641f6..ed7be50b2f72 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -2112,6 +2112,18 @@ static void guc_exec_queue_revert_pending_state_change(struct xe_guc *guc, q->guc->resume_time = 0; } +static void lrc_parallel_clear(struct xe_lrc *lrc) +{ + struct xe_device *xe = gt_to_xe(lrc->gt); + struct iosys_map map = xe_lrc_parallel_map(lrc); + int i; + + for (i = 0; i < WQ_SIZE / sizeof(u32); ++i) + parallel_write(xe, map, wq[i], + FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | + FIELD_PREP(WQ_LEN_MASK, 0)); +} + /* * This function is quite complex but only real way to ensure no state is lost * during VF resume flows. The function scans the queue state, make adjustments @@ -2135,8 +2147,8 @@ static void guc_exec_queue_pause(struct xe_guc *guc, struct xe_exec_queue *q) guc_exec_queue_revert_pending_state_change(guc, q); if (xe_exec_queue_is_parallel(q)) { - struct xe_device *xe = guc_to_xe(guc); - struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); + /* Pairs with WRITE_ONCE in __xe_exec_queue_init */ + struct xe_lrc *lrc = READ_ONCE(q->lrc[0]); /* * NOP existing WQ commands that may contain stale GGTT @@ -2144,10 +2156,8 @@ static void guc_exec_queue_pause(struct xe_guc *guc, struct xe_exec_queue *q) * seems to get confused if the WQ head/tail pointers are * adjusted. */ - for (i = 0; i < WQ_SIZE / sizeof(u32); ++i) - parallel_write(xe, map, wq[i], - FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | - FIELD_PREP(WQ_LEN_MASK, 0)); + if (lrc) + lrc_parallel_clear(lrc); } job = xe_sched_first_pending_job(sched);