mirror of
https://github.com/torvalds/linux.git
synced 2026-06-02 19:43:40 +02:00
drm/xe/vf: Fix VM crash during VF driver release
The VF CCS save/restore series (patchwork #149108) has a dependency on the migration framework. A recent migration update in commitd65ff1ec85("drm/xe: Split xe_migrate allocation from initialization") caused a VM crash during XE driver release for iGPU devices. Oops: general protection fault, probably for non-canonical address 0x6b6b6b6b6b6b6b83: 0000 [#1] SMP NOPTI RIP: 0010:xe_lrc_ring_head+0x12/0xb0 [xe] Call Trace: xe_sriov_vf_ccs_fini+0x1e/0x40 [xe] devm_action_release+0x12/0x30 release_nodes+0x3a/0x120 devres_release_all+0x96/0xd0 device_unbind_cleanup+0x12/0x80 device_release_driver_internal+0x23a/0x280 device_release_driver+0x12/0x20 pci_stop_bus_device+0x69/0x90 pci_stop_and_remove_bus_device+0x12/0x30 pci_iov_remove_virtfn+0xbd/0x130 sriov_disable+0x42/0x100 pci_disable_sriov+0x34/0x50 xe_pci_sriov_configure+0xf71/0x1020 [xe] Update the VF CCS migration initialization sequence to align with the new migration framework changes, resolving the release-time crash. Fixes:f3009272ff("drm/xe/vf: Create contexts for CCS read write") Signed-off-by: Satyanarayana K V P <satyanarayana.k.v.p@intel.com> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com> Cc: Matthew Brost <matthew.brost@intel.com> Cc: Matthew Auld <matthew.auld@intel.com> Cc: Piotr Piórkowski <piotr.piorkowski@intel.com> Reviewed-by: Piotr Piórkowski <piotr.piorkowski@intel.com> Signed-off-by: Matthew Brost <matthew.brost@intel.com> Link: https://lore.kernel.org/r/20250729120720.13990-1-satyanarayana.k.v.p@intel.com
This commit is contained in:
parent
d6a0311c37
commit
a843b98947
|
|
@ -564,12 +564,10 @@ static int gt_init_with_all_forcewake(struct xe_gt *gt)
|
||||||
if (xe_gt_is_main_type(gt)) {
|
if (xe_gt_is_main_type(gt)) {
|
||||||
struct xe_tile *tile = gt_to_tile(gt);
|
struct xe_tile *tile = gt_to_tile(gt);
|
||||||
|
|
||||||
tile->migrate = xe_migrate_init(tile);
|
err = xe_migrate_init(tile->migrate);
|
||||||
if (IS_ERR(tile->migrate)) {
|
if (err)
|
||||||
err = PTR_ERR(tile->migrate);
|
|
||||||
goto err_force_wake;
|
goto err_force_wake;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
err = xe_uc_load_hw(>->uc);
|
err = xe_uc_load_hw(>->uc);
|
||||||
if (err)
|
if (err)
|
||||||
|
|
|
||||||
|
|
@ -396,15 +396,15 @@ struct xe_migrate *xe_migrate_alloc(struct xe_tile *tile)
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* xe_migrate_init() - Initialize a migrate context
|
* xe_migrate_init() - Initialize a migrate context
|
||||||
* @tile: Back-pointer to the tile we're initializing for.
|
* @m: The migration context
|
||||||
*
|
*
|
||||||
* Return: Pointer to a migrate context on success. Error pointer on error.
|
* Return: 0 if successful, negative error code on failure
|
||||||
*/
|
*/
|
||||||
struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
|
int xe_migrate_init(struct xe_migrate *m)
|
||||||
{
|
{
|
||||||
struct xe_device *xe = tile_to_xe(tile);
|
struct xe_tile *tile = m->tile;
|
||||||
struct xe_gt *primary_gt = tile->primary_gt;
|
struct xe_gt *primary_gt = tile->primary_gt;
|
||||||
struct xe_migrate *m = tile->migrate;
|
struct xe_device *xe = tile_to_xe(tile);
|
||||||
struct xe_vm *vm;
|
struct xe_vm *vm;
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
|
|
@ -412,15 +412,13 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
|
||||||
vm = xe_vm_create(xe, XE_VM_FLAG_MIGRATION |
|
vm = xe_vm_create(xe, XE_VM_FLAG_MIGRATION |
|
||||||
XE_VM_FLAG_SET_TILE_ID(tile));
|
XE_VM_FLAG_SET_TILE_ID(tile));
|
||||||
if (IS_ERR(vm))
|
if (IS_ERR(vm))
|
||||||
return ERR_CAST(vm);
|
return PTR_ERR(vm);
|
||||||
|
|
||||||
xe_vm_lock(vm, false);
|
xe_vm_lock(vm, false);
|
||||||
err = xe_migrate_prepare_vm(tile, m, vm);
|
err = xe_migrate_prepare_vm(tile, m, vm);
|
||||||
xe_vm_unlock(vm);
|
xe_vm_unlock(vm);
|
||||||
if (err) {
|
if (err)
|
||||||
xe_vm_close_and_put(vm);
|
goto err_out;
|
||||||
return ERR_PTR(err);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (xe->info.has_usm) {
|
if (xe->info.has_usm) {
|
||||||
struct xe_hw_engine *hwe = xe_gt_hw_engine(primary_gt,
|
struct xe_hw_engine *hwe = xe_gt_hw_engine(primary_gt,
|
||||||
|
|
@ -429,8 +427,10 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
|
||||||
false);
|
false);
|
||||||
u32 logical_mask = xe_migrate_usm_logical_mask(primary_gt);
|
u32 logical_mask = xe_migrate_usm_logical_mask(primary_gt);
|
||||||
|
|
||||||
if (!hwe || !logical_mask)
|
if (!hwe || !logical_mask) {
|
||||||
return ERR_PTR(-EINVAL);
|
err = -EINVAL;
|
||||||
|
goto err_out;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* XXX: Currently only reserving 1 (likely slow) BCS instance on
|
* XXX: Currently only reserving 1 (likely slow) BCS instance on
|
||||||
|
|
@ -449,8 +449,8 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
|
||||||
EXEC_QUEUE_FLAG_MIGRATE, 0);
|
EXEC_QUEUE_FLAG_MIGRATE, 0);
|
||||||
}
|
}
|
||||||
if (IS_ERR(m->q)) {
|
if (IS_ERR(m->q)) {
|
||||||
xe_vm_close_and_put(vm);
|
err = PTR_ERR(m->q);
|
||||||
return ERR_CAST(m->q);
|
goto err_out;
|
||||||
}
|
}
|
||||||
|
|
||||||
mutex_init(&m->job_mutex);
|
mutex_init(&m->job_mutex);
|
||||||
|
|
@ -460,7 +460,7 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
|
||||||
|
|
||||||
err = devm_add_action_or_reset(xe->drm.dev, xe_migrate_fini, m);
|
err = devm_add_action_or_reset(xe->drm.dev, xe_migrate_fini, m);
|
||||||
if (err)
|
if (err)
|
||||||
return ERR_PTR(err);
|
return err;
|
||||||
|
|
||||||
if (IS_DGFX(xe)) {
|
if (IS_DGFX(xe)) {
|
||||||
if (xe_migrate_needs_ccs_emit(xe))
|
if (xe_migrate_needs_ccs_emit(xe))
|
||||||
|
|
@ -475,7 +475,12 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
|
||||||
(unsigned long long)m->min_chunk_size);
|
(unsigned long long)m->min_chunk_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
return m;
|
return err;
|
||||||
|
|
||||||
|
err_out:
|
||||||
|
xe_vm_close_and_put(vm);
|
||||||
|
return err;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static u64 max_mem_transfer_per_pass(struct xe_device *xe)
|
static u64 max_mem_transfer_per_pass(struct xe_device *xe)
|
||||||
|
|
|
||||||
|
|
@ -105,7 +105,7 @@ struct xe_migrate_pt_update {
|
||||||
};
|
};
|
||||||
|
|
||||||
struct xe_migrate *xe_migrate_alloc(struct xe_tile *tile);
|
struct xe_migrate *xe_migrate_alloc(struct xe_tile *tile);
|
||||||
struct xe_migrate *xe_migrate_init(struct xe_tile *tile);
|
int xe_migrate_init(struct xe_migrate *m);
|
||||||
|
|
||||||
struct dma_fence *xe_migrate_to_vram(struct xe_migrate *m,
|
struct dma_fence *xe_migrate_to_vram(struct xe_migrate *m,
|
||||||
unsigned long npages,
|
unsigned long npages,
|
||||||
|
|
|
||||||
|
|
@ -270,11 +270,16 @@ int xe_sriov_vf_ccs_init(struct xe_device *xe)
|
||||||
ctx = &tile->sriov.vf.ccs[ctx_id];
|
ctx = &tile->sriov.vf.ccs[ctx_id];
|
||||||
ctx->ctx_id = ctx_id;
|
ctx->ctx_id = ctx_id;
|
||||||
|
|
||||||
migrate = xe_migrate_init(tile);
|
migrate = xe_migrate_alloc(tile);
|
||||||
if (IS_ERR(migrate)) {
|
if (IS_ERR(migrate)) {
|
||||||
err = PTR_ERR(migrate);
|
err = PTR_ERR(migrate);
|
||||||
goto err_ret;
|
goto err_ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
err = xe_migrate_init(migrate);
|
||||||
|
if (err)
|
||||||
|
goto err_ret;
|
||||||
|
|
||||||
ctx->migrate = migrate;
|
ctx->migrate = migrate;
|
||||||
|
|
||||||
err = alloc_bb_pool(tile, ctx);
|
err = alloc_bb_pool(tile, ctx);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user