drm/xe/xe_survivability: Refactor survivability mode

Refactor survivability mode code to support both boot
and runtime survivability.

Signed-off-by: Riana Tauro <riana.tauro@intel.com>
Reviewed-by: Raag Jadav <raag.jadav@intel.com>
Link: https://lore.kernel.org/r/20250826063419.3022216-6-riana.tauro@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
This commit is contained in:
Riana Tauro 2025-08-26 12:04:12 +05:30 committed by Rodrigo Vivi
parent 60439ac3f2
commit 41ff795aff
No known key found for this signature in database
GPG Key ID: FA625F640EEB13CA
6 changed files with 81 additions and 33 deletions

View File

@ -743,7 +743,7 @@ int xe_device_probe_early(struct xe_device *xe)
* possible, but still return the previous error for error
* propagation
*/
err = xe_survivability_mode_enable(xe);
err = xe_survivability_mode_boot_enable(xe);
if (err)
return err;

View File

@ -197,7 +197,7 @@ int xe_heci_gsc_init(struct xe_device *xe)
if (ret)
return ret;
if (!def->use_polling && !xe_survivability_mode_is_enabled(xe)) {
if (!def->use_polling && !xe_survivability_mode_is_boot_enabled(xe)) {
ret = heci_gsc_irq_setup(xe);
if (ret)
return ret;

View File

@ -784,7 +784,7 @@ static void xe_pci_remove(struct pci_dev *pdev)
if (IS_SRIOV_PF(xe))
xe_pci_sriov_configure(pdev, 0);
if (xe_survivability_mode_is_enabled(xe))
if (xe_survivability_mode_is_boot_enabled(xe))
return;
xe_device_remove(xe);
@ -866,7 +866,7 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
* flashed through mei. Return success, if survivability mode
* is enabled due to pcode failure or configfs being set
*/
if (xe_survivability_mode_is_enabled(xe))
if (xe_survivability_mode_is_boot_enabled(xe))
return 0;
if (err)
@ -960,7 +960,7 @@ static int xe_pci_suspend(struct device *dev)
struct xe_device *xe = pdev_to_xe_device(pdev);
int err;
if (xe_survivability_mode_is_enabled(xe))
if (xe_survivability_mode_is_boot_enabled(xe))
return -EBUSY;
err = xe_pm_suspend(xe);

View File

@ -121,6 +121,14 @@ static void log_survivability_info(struct pci_dev *pdev)
}
}
static int check_boot_failure(struct xe_device *xe)
{
struct xe_survivability *survivability = &xe->survivability;
return survivability->boot_status == NON_CRITICAL_FAILURE ||
survivability->boot_status == CRITICAL_FAILURE;
}
static ssize_t survivability_mode_show(struct device *dev,
struct device_attribute *attr, char *buff)
{
@ -130,6 +138,11 @@ static ssize_t survivability_mode_show(struct device *dev,
struct xe_survivability_info *info = survivability->info;
int index = 0, count = 0;
count += sysfs_emit_at(buff, count, "Survivability mode type: Boot\n");
if (!check_boot_failure(xe))
return count;
for (index = 0; index < MAX_SCRATCH_MMIO; index++) {
if (info[index].reg)
count += sysfs_emit_at(buff, count, "%s: 0x%x - 0x%x\n", info[index].name,
@ -151,12 +164,11 @@ static void xe_survivability_mode_fini(void *arg)
sysfs_remove_file(&dev->kobj, &dev_attr_survivability_mode.attr);
}
static int enable_survivability_mode(struct pci_dev *pdev)
static int create_survivability_sysfs(struct pci_dev *pdev)
{
struct device *dev = &pdev->dev;
struct xe_device *xe = pdev_to_xe_device(pdev);
struct xe_survivability *survivability = &xe->survivability;
int ret = 0;
int ret;
/* create survivability mode sysfs */
ret = sysfs_create_file(&dev->kobj, &dev_attr_survivability_mode.attr);
@ -170,6 +182,20 @@ static int enable_survivability_mode(struct pci_dev *pdev)
if (ret)
return ret;
return 0;
}
static int enable_boot_survivability_mode(struct pci_dev *pdev)
{
struct device *dev = &pdev->dev;
struct xe_device *xe = pdev_to_xe_device(pdev);
struct xe_survivability *survivability = &xe->survivability;
int ret = 0;
ret = create_survivability_sysfs(pdev);
if (ret)
return ret;
/* Make sure xe_heci_gsc_init() knows about survivability mode */
survivability->mode = true;
@ -192,15 +218,36 @@ static int enable_survivability_mode(struct pci_dev *pdev)
return ret;
}
static int init_survivability_mode(struct xe_device *xe)
{
struct xe_survivability *survivability = &xe->survivability;
struct xe_survivability_info *info;
survivability->size = MAX_SCRATCH_MMIO;
info = devm_kcalloc(xe->drm.dev, survivability->size, sizeof(*info),
GFP_KERNEL);
if (!info)
return -ENOMEM;
survivability->info = info;
populate_survivability_info(xe);
return 0;
}
/**
* xe_survivability_mode_is_enabled - check if survivability mode is enabled
* xe_survivability_mode_is_boot_enabled- check if boot survivability mode is enabled
* @xe: xe device instance
*
* Returns true if in survivability mode, false otherwise
* Returns true if in boot survivability mode of type, else false
*/
bool xe_survivability_mode_is_enabled(struct xe_device *xe)
bool xe_survivability_mode_is_boot_enabled(struct xe_device *xe)
{
return xe->survivability.mode;
struct xe_survivability *survivability = &xe->survivability;
return survivability->mode && survivability->type == XE_SURVIVABILITY_TYPE_BOOT;
}
/**
@ -241,44 +288,38 @@ bool xe_survivability_mode_is_requested(struct xe_device *xe)
data = xe_mmio_read32(mmio, PCODE_SCRATCH(0));
survivability->boot_status = REG_FIELD_GET(BOOT_STATUS, data);
return survivability->boot_status == NON_CRITICAL_FAILURE ||
survivability->boot_status == CRITICAL_FAILURE;
return check_boot_failure(xe);
}
/**
* xe_survivability_mode_enable - Initialize and enable the survivability mode
* xe_survivability_mode_boot_enable - Initialize and enable boot survivability mode
* @xe: xe device instance
*
* Initialize survivability information and enable survivability mode
* Initialize survivability information and enable boot survivability mode
*
* Return: 0 if survivability mode is enabled or not requested; negative error
* Return: 0 if boot survivability mode is enabled or not requested, negative error
* code otherwise.
*/
int xe_survivability_mode_enable(struct xe_device *xe)
int xe_survivability_mode_boot_enable(struct xe_device *xe)
{
struct xe_survivability *survivability = &xe->survivability;
struct xe_survivability_info *info;
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
int ret;
if (!xe_survivability_mode_is_requested(xe))
return 0;
survivability->size = MAX_SCRATCH_MMIO;
ret = init_survivability_mode(xe);
if (ret)
return ret;
info = devm_kcalloc(xe->drm.dev, survivability->size, sizeof(*info),
GFP_KERNEL);
if (!info)
return -ENOMEM;
survivability->info = info;
populate_survivability_info(xe);
/* Only log debug information and exit if it is a critical failure */
/* Log breadcrumbs but do not enter survivability mode for Critical boot errors */
if (survivability->boot_status == CRITICAL_FAILURE) {
log_survivability_info(pdev);
return -ENXIO;
}
return enable_survivability_mode(pdev);
survivability->type = XE_SURVIVABILITY_TYPE_BOOT;
return enable_boot_survivability_mode(pdev);
}

View File

@ -10,8 +10,8 @@
struct xe_device;
int xe_survivability_mode_enable(struct xe_device *xe);
bool xe_survivability_mode_is_enabled(struct xe_device *xe);
int xe_survivability_mode_boot_enable(struct xe_device *xe);
bool xe_survivability_mode_is_boot_enabled(struct xe_device *xe);
bool xe_survivability_mode_is_requested(struct xe_device *xe);
#endif /* _XE_SURVIVABILITY_MODE_H_ */

View File

@ -9,6 +9,10 @@
#include <linux/limits.h>
#include <linux/types.h>
enum xe_survivability_type {
XE_SURVIVABILITY_TYPE_BOOT,
};
struct xe_survivability_info {
char name[NAME_MAX];
u32 reg;
@ -30,6 +34,9 @@ struct xe_survivability {
/** @mode: boolean to indicate survivability mode */
bool mode;
/** @type: survivability type */
enum xe_survivability_type type;
};
#endif /* _XE_SURVIVABILITY_MODE_TYPES_H_ */