mirror of
https://github.com/torvalds/linux.git
synced 2026-05-25 23:52:08 +02:00
drm/xe: Process deferred GGTT node removals on device unwind
While we are indirectly draining our dedicated workqueue ggtt->wq that we use to complete asynchronous removal of some GGTT nodes, this happends as part of the managed-drm unwinding (ggtt_fini_early), which could be later then manage-device unwinding, where we could already unmap our MMIO/GMS mapping (mmio_fini). This was recently observed during unsuccessful VF initialization: [ ] xe 0000:00:02.1: probe with driver xe failed with error -62 [ ] xe 0000:00:02.1: DEVRES REL ffff88811e747340 __xe_bo_unpin_map_no_vm (16 bytes) [ ] xe 0000:00:02.1: DEVRES REL ffff88811e747540 __xe_bo_unpin_map_no_vm (16 bytes) [ ] xe 0000:00:02.1: DEVRES REL ffff88811e747240 __xe_bo_unpin_map_no_vm (16 bytes) [ ] xe 0000:00:02.1: DEVRES REL ffff88811e747040 tiles_fini (16 bytes) [ ] xe 0000:00:02.1: DEVRES REL ffff88811e746840 mmio_fini (16 bytes) [ ] xe 0000:00:02.1: DEVRES REL ffff88811e747f40 xe_bo_pinned_fini (16 bytes) [ ] xe 0000:00:02.1: DEVRES REL ffff88811e746b40 devm_drm_dev_init_release (16 bytes) [ ] xe 0000:00:02.1: [drm:drm_managed_release] drmres release begin [ ] xe 0000:00:02.1: [drm:drm_managed_release] REL ffff88810ef81640 __fini_relay (8 bytes) [ ] xe 0000:00:02.1: [drm:drm_managed_release] REL ffff88810ef80d40 guc_ct_fini (8 bytes) [ ] xe 0000:00:02.1: [drm:drm_managed_release] REL ffff88810ef80040 __drmm_mutex_release (8 bytes) [ ] xe 0000:00:02.1: [drm:drm_managed_release] REL ffff88810ef80140 ggtt_fini_early (8 bytes) and this was leading to: [ ] BUG: unable to handle page fault for address: ffffc900058162a0 [ ] #PF: supervisor write access in kernel mode [ ] #PF: error_code(0x0002) - not-present page [ ] Oops: Oops: 0002 [#1] SMP NOPTI [ ] Tainted: [W]=WARN [ ] Workqueue: xe-ggtt-wq ggtt_node_remove_work_func [xe] [ ] RIP: 0010:xe_ggtt_set_pte+0x6d/0x350 [xe] [ ] Call Trace: [ ] <TASK> [ ] xe_ggtt_clear+0xb0/0x270 [xe] [ ] ggtt_node_remove+0xbb/0x120 [xe] [ ] ggtt_node_remove_work_func+0x30/0x50 [xe] [ ] process_one_work+0x22b/0x6f0 [ ] worker_thread+0x1e8/0x3d Add managed-device action that will explicitly drain the workqueue with all pending node removals prior to releasing MMIO/GSM mapping. Fixes:919bb54e98("drm/xe: Fix missing runtime outer protection for ggtt_remove_node") Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> Cc: Lucas De Marchi <lucas.demarchi@intel.com> Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com> Link: https://lore.kernel.org/r/20250612220937.857-2-michal.wajdeczko@intel.com (cherry picked from commit89d2835c36) Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
This commit is contained in:
parent
ad40098da5
commit
af2b588abe
|
|
@ -201,6 +201,13 @@ static const struct xe_ggtt_pt_ops xelpg_pt_wa_ops = {
|
|||
.ggtt_set_pte = xe_ggtt_set_pte_and_flush,
|
||||
};
|
||||
|
||||
static void dev_fini_ggtt(void *arg)
|
||||
{
|
||||
struct xe_ggtt *ggtt = arg;
|
||||
|
||||
drain_workqueue(ggtt->wq);
|
||||
}
|
||||
|
||||
/**
|
||||
* xe_ggtt_init_early - Early GGTT initialization
|
||||
* @ggtt: the &xe_ggtt to be initialized
|
||||
|
|
@ -257,6 +264,10 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt)
|
|||
if (err)
|
||||
return err;
|
||||
|
||||
err = devm_add_action_or_reset(xe->drm.dev, dev_fini_ggtt, ggtt);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (IS_SRIOV_VF(xe)) {
|
||||
err = xe_gt_sriov_vf_prepare_ggtt(xe_tile_get_gt(ggtt->tile, 0));
|
||||
if (err)
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user