drm/xe: Add callback support for driver remove

xe device probe uses devm cleanup in most places. However there are a
few cases where this is not possible: when the driver interacts with
component add/del. In that case, the resource group would be cleanup
while the entire device resources are in the process of cleanup.  One
example is the xe_gsc_proxy and display using that to interact with mei
and audio.

Add a callback-based remove so the exception doesn't make the probe
use multiple error handling styles.

v2: Change internal API to mimic the devm API. This will make it easier
    to migrate in future when devm can be used.

Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20250213192909.996148-1-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
This commit is contained in:
Lucas De Marchi 2025-02-13 11:28:57 -08:00
parent 6884d20510
commit 776e3b502b
4 changed files with 88 additions and 1 deletions

View File

@ -65,6 +65,12 @@
#include <generated/xe_wa_oob.h>
struct xe_device_remove_action {
struct list_head node;
void (*action)(void *);
void *data;
};
static int xe_file_open(struct drm_device *dev, struct drm_file *file)
{
struct xe_device *xe = to_xe_device(dev);
@ -746,6 +752,9 @@ int xe_device_probe(struct xe_device *xe)
u8 last_gt;
u8 id;
xe->probing = true;
INIT_LIST_HEAD(&xe->remove_action_list);
xe_pat_init_early(xe);
err = xe_sriov_init(xe);
@ -886,6 +895,8 @@ int xe_device_probe(struct xe_device *xe)
xe_vsec_init(xe);
xe->probing = false;
return devm_add_action_or_reset(xe->drm.dev, xe_device_sanitize, xe);
err_fini_display:
@ -907,6 +918,61 @@ int xe_device_probe(struct xe_device *xe)
return err;
}
/**
* xe_device_call_remove_actions - Call the remove actions
* @xe: xe device instance
*
* This is only to be used by xe_pci and xe_device to call the remove actions
* while removing the driver or handling probe failures.
*/
void xe_device_call_remove_actions(struct xe_device *xe)
{
struct xe_device_remove_action *ra, *tmp;
list_for_each_entry_safe(ra, tmp, &xe->remove_action_list, node) {
ra->action(ra->data);
list_del(&ra->node);
kfree(ra);
}
xe->probing = false;
}
/**
* xe_device_add_action_or_reset - Add an action to run on driver removal
* @xe: xe device instance
* @action: Function that should be called on device remove
* @data: Pointer to data passed to @action implementation
*
* This adds a custom action to the list of remove callbacks executed on device
* remove, before any dev or drm managed resources are removed. This is only
* needed if the action leads to component_del()/component_master_del() since
* that is not compatible with devres cleanup.
*
* Returns: 0 on success or a negative error code on failure, in which case
* @action is already called.
*/
int xe_device_add_action_or_reset(struct xe_device *xe,
void (*action)(void *), void *data)
{
struct xe_device_remove_action *ra;
drm_WARN_ON(&xe->drm, !xe->probing);
ra = kmalloc(sizeof(*ra), GFP_KERNEL);
if (!ra) {
action(data);
return -ENOMEM;
}
INIT_LIST_HEAD(&ra->node);
ra->action = action;
ra->data = data;
list_add(&ra->node, &xe->remove_action_list);
return 0;
}
static void xe_device_remove_display(struct xe_device *xe)
{
xe_display_unregister(xe);
@ -932,6 +998,8 @@ void xe_device_remove(struct xe_device *xe)
for_each_gt(gt, xe, id)
xe_gt_remove(gt);
xe_device_call_remove_actions(xe);
}
void xe_device_shutdown(struct xe_device *xe)

View File

@ -45,6 +45,9 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
const struct pci_device_id *ent);
int xe_device_probe_early(struct xe_device *xe);
int xe_device_probe(struct xe_device *xe);
int xe_device_add_action_or_reset(struct xe_device *xe,
void (*action)(void *), void *data);
void xe_device_call_remove_actions(struct xe_device *xe);
void xe_device_remove(struct xe_device *xe);
void xe_device_shutdown(struct xe_device *xe);

View File

@ -428,6 +428,20 @@ struct xe_device {
/** @tiles: device tiles */
struct xe_tile tiles[XE_MAX_TILES_PER_DEVICE];
/**
* @remove_action_list: list of actions to execute on device remove.
* Use xe_device_add_remove_action() for that. Actions can only be added
* during probe and are executed during the call from PCI subsystem to
* remove the driver from the device.
*/
struct list_head remove_action_list;
/**
* @probing: cover the section in which @remove_action_list can be used
* to post cleaning actions
*/
bool probing;
/**
* @mem_access: keep track of memory access in the device, possibly
* triggering additional actions when they occur.

View File

@ -900,8 +900,10 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
return err;
err = xe_device_probe(xe);
if (err)
if (err) {
xe_device_call_remove_actions(xe);
return err;
}
err = xe_pm_init(xe);
if (err)