mirror of
https://github.com/torvalds/linux.git
synced 2026-05-30 18:13:41 +02:00
drm/xe/xe_survivability: Redesign survivability mode
Redesign survivability mode to have only one value per file. 1) Retain the survivability_mode sysfs to indicate the type cat /sys/bus/pci/devices/0000\:03\:00.0/survivability_mode (Boot / Runtime) 2) Add survivability_info directory to expose boot breadcrumbs. Entries in survivability mode sysfs are only visible when boot breadcrumb registers are populated. /sys/bus/pci/devices/0000:03:00.0/survivability_info ├── aux_info0 ├── aux_info1 ├── aux_info2 ├── aux_info3 ├── aux_info4 ├── capability_info ├── postcode_trace └── postcode_trace_overflow Capability Info: Provides data about boot status and has bits that indicate the support for the other breadcrumbs Postcode Trace / Postcode Trace Overflow : Each postcode is represented as an 8-bit value and represents a boot failure event. When a new failure event is logged by Pcode the existing postcodes are shifted left. These entries provide a history of 8 postcodes. Auxiliary Info: Some failures have additional debug information. Signed-off-by: Riana Tauro <riana.tauro@intel.com> Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com> Link: https://patch.msgid.link/20251208084539.3652902-5-riana.tauro@intel.com Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
This commit is contained in:
parent
a00e305fba
commit
f4e9fc967a
|
|
@ -19,8 +19,6 @@
|
|||
#include "xe_pcode_api.h"
|
||||
#include "xe_vsec.h"
|
||||
|
||||
#define MAX_SCRATCH_MMIO 8
|
||||
|
||||
/**
|
||||
* DOC: Survivability Mode
|
||||
*
|
||||
|
|
@ -48,19 +46,38 @@
|
|||
*
|
||||
* Refer :ref:`xe_configfs` for more details on how to use configfs
|
||||
*
|
||||
* Survivability mode is indicated by the below admin-only readable sysfs which provides additional
|
||||
* debug information::
|
||||
* Survivability mode is indicated by the below admin-only readable sysfs entry. It
|
||||
* provides information about the type of survivability mode (Boot/Runtime).
|
||||
*
|
||||
* /sys/bus/pci/devices/<device>/survivability_mode
|
||||
* .. code-block:: shell
|
||||
*
|
||||
* Capability Information:
|
||||
* Provides boot status
|
||||
* Postcode Information:
|
||||
* Provides information about the failure
|
||||
* Overflow Information
|
||||
* Provides history of previous failures
|
||||
* Auxiliary Information
|
||||
* Certain failures may have information in addition to postcode information
|
||||
* # cat /sys/bus/pci/devices/<device>/survivability_mode
|
||||
* Boot
|
||||
*
|
||||
*
|
||||
* Any additional debug information if present will be visible under the directory
|
||||
* ``survivability_info``::
|
||||
*
|
||||
* /sys/bus/pci/devices/<device>/survivability_info/
|
||||
* ├── aux_info0
|
||||
* ├── aux_info1
|
||||
* ├── aux_info2
|
||||
* ├── aux_info3
|
||||
* ├── aux_info4
|
||||
* ├── capability_info
|
||||
* ├── fdo_mode
|
||||
* ├── postcode_trace
|
||||
* └── postcode_trace_overflow
|
||||
*
|
||||
* This directory has the following attributes
|
||||
*
|
||||
* - ``capability_info`` : Indicates Boot status and support for additional information
|
||||
*
|
||||
* - ``postcode_trace``, ``postcode_trace_overflow`` : Each postcode is a 8bit value and
|
||||
* represents a boot failure event. When a new failure event is logged by PCODE the
|
||||
* existing postcodes are shifted left. These entries provide a history of 8 postcodes.
|
||||
*
|
||||
* - ``aux_info<n>`` : Some failures have additional debug information
|
||||
*
|
||||
* Runtime Survivability
|
||||
* =====================
|
||||
|
|
@ -68,60 +85,76 @@
|
|||
* Certain runtime firmware errors can cause the device to enter a wedged state
|
||||
* (:ref:`xe-device-wedging`) requiring a firmware flash to restore normal operation.
|
||||
* Runtime Survivability Mode indicates that a firmware flash is necessary to recover the device and
|
||||
* is indicated by the presence of survivability mode sysfs::
|
||||
*
|
||||
* /sys/bus/pci/devices/<device>/survivability_mode
|
||||
*
|
||||
* is indicated by the presence of survivability mode sysfs.
|
||||
* Survivability mode sysfs provides information about the type of survivability mode.
|
||||
*
|
||||
* .. code-block:: shell
|
||||
*
|
||||
* # cat /sys/bus/pci/devices/<device>/survivability_mode
|
||||
* Runtime
|
||||
*
|
||||
* When such errors occur, userspace is notified with the drm device wedged uevent and runtime
|
||||
* survivability mode. User can then initiate a firmware flash using userspace tools like fwupd
|
||||
* to restore device to normal operation.
|
||||
*/
|
||||
|
||||
static const char * const reg_map[] = {
|
||||
[CAPABILITY_INFO] = "Capability Info",
|
||||
[POSTCODE_TRACE] = "Postcode trace",
|
||||
[POSTCODE_TRACE_OVERFLOW] = "Postcode trace overflow",
|
||||
[AUX_INFO0] = "Auxiliary Info 0",
|
||||
[AUX_INFO1] = "Auxiliary Info 1",
|
||||
[AUX_INFO2] = "Auxiliary Info 2",
|
||||
[AUX_INFO3] = "Auxiliary Info 3",
|
||||
[AUX_INFO4] = "Auxiliary Info 4",
|
||||
};
|
||||
|
||||
struct xe_survivability_attribute {
|
||||
struct device_attribute attr;
|
||||
u8 index;
|
||||
};
|
||||
|
||||
static struct
|
||||
xe_survivability_attribute *dev_attr_to_survivability_attr(struct device_attribute *attr)
|
||||
{
|
||||
return container_of(attr, struct xe_survivability_attribute, attr);
|
||||
}
|
||||
|
||||
static u32 aux_history_offset(u32 reg_value)
|
||||
{
|
||||
return REG_FIELD_GET(AUXINFO_HISTORY_OFFSET, reg_value);
|
||||
}
|
||||
|
||||
static void set_survivability_info(struct xe_mmio *mmio, struct xe_survivability_info *info,
|
||||
int id, char *name)
|
||||
static void set_survivability_info(struct xe_mmio *mmio, u32 *info, int id)
|
||||
{
|
||||
strscpy(info[id].name, name, sizeof(info[id].name));
|
||||
info[id].reg = PCODE_SCRATCH(id).raw;
|
||||
info[id].value = xe_mmio_read32(mmio, PCODE_SCRATCH(id));
|
||||
info[id] = xe_mmio_read32(mmio, PCODE_SCRATCH(id));
|
||||
}
|
||||
|
||||
static void populate_survivability_info(struct xe_device *xe)
|
||||
{
|
||||
struct xe_survivability *survivability = &xe->survivability;
|
||||
struct xe_survivability_info *info = survivability->info;
|
||||
u32 *info = survivability->info;
|
||||
struct xe_mmio *mmio;
|
||||
u32 id = 0, reg_value;
|
||||
char name[NAME_MAX];
|
||||
int index;
|
||||
|
||||
mmio = xe_root_tile_mmio(xe);
|
||||
set_survivability_info(mmio, info, id, "Capability Info");
|
||||
reg_value = info[id].value;
|
||||
set_survivability_info(mmio, info, CAPABILITY_INFO);
|
||||
reg_value = info[CAPABILITY_INFO];
|
||||
|
||||
if (reg_value & HISTORY_TRACKING) {
|
||||
id++;
|
||||
set_survivability_info(mmio, info, id, "Postcode Info");
|
||||
set_survivability_info(mmio, info, POSTCODE_TRACE);
|
||||
|
||||
if (reg_value & OVERFLOW_SUPPORT) {
|
||||
id = REG_FIELD_GET(OVERFLOW_REG_OFFSET, reg_value);
|
||||
set_survivability_info(mmio, info, id, "Overflow Info");
|
||||
}
|
||||
if (reg_value & OVERFLOW_SUPPORT)
|
||||
set_survivability_info(mmio, info, POSTCODE_TRACE_OVERFLOW);
|
||||
}
|
||||
|
||||
if (reg_value & AUXINFO_SUPPORT) {
|
||||
id = REG_FIELD_GET(AUXINFO_REG_OFFSET, reg_value);
|
||||
|
||||
for (index = 0; id && reg_value; index++, reg_value = info[id].value,
|
||||
id = aux_history_offset(reg_value)) {
|
||||
snprintf(name, NAME_MAX, "Auxiliary Info %d", index);
|
||||
set_survivability_info(mmio, info, id, name);
|
||||
for (index = 0; id >= AUX_INFO0 && id < MAX_SCRATCH_REG; index++) {
|
||||
set_survivability_info(mmio, info, id);
|
||||
id = aux_history_offset(info[id]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -130,15 +163,14 @@ static void log_survivability_info(struct pci_dev *pdev)
|
|||
{
|
||||
struct xe_device *xe = pdev_to_xe_device(pdev);
|
||||
struct xe_survivability *survivability = &xe->survivability;
|
||||
struct xe_survivability_info *info = survivability->info;
|
||||
u32 *info = survivability->info;
|
||||
int id;
|
||||
|
||||
dev_info(&pdev->dev, "Survivability Boot Status : Critical Failure (%d)\n",
|
||||
survivability->boot_status);
|
||||
for (id = 0; id < MAX_SCRATCH_MMIO; id++) {
|
||||
if (info[id].reg)
|
||||
dev_info(&pdev->dev, "%s: 0x%x - 0x%x\n", info[id].name,
|
||||
info[id].reg, info[id].value);
|
||||
for (id = 0; id < MAX_SCRATCH_REG; id++) {
|
||||
if (info[id])
|
||||
dev_info(&pdev->dev, "%s: 0x%x\n", reg_map[id], info[id]);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -156,43 +188,87 @@ static ssize_t survivability_mode_show(struct device *dev,
|
|||
struct pci_dev *pdev = to_pci_dev(dev);
|
||||
struct xe_device *xe = pdev_to_xe_device(pdev);
|
||||
struct xe_survivability *survivability = &xe->survivability;
|
||||
struct xe_survivability_info *info = survivability->info;
|
||||
int index = 0, count = 0;
|
||||
|
||||
count += sysfs_emit_at(buff, count, "Survivability mode type: %s\n",
|
||||
survivability->type ? "Runtime" : "Boot");
|
||||
|
||||
if (!check_boot_failure(xe))
|
||||
return count;
|
||||
|
||||
for (index = 0; index < MAX_SCRATCH_MMIO; index++) {
|
||||
if (info[index].reg)
|
||||
count += sysfs_emit_at(buff, count, "%s: 0x%x - 0x%x\n", info[index].name,
|
||||
info[index].reg, info[index].value);
|
||||
}
|
||||
|
||||
return count;
|
||||
return sysfs_emit(buff, "%s\n", survivability->type ? "Runtime" : "Boot");
|
||||
}
|
||||
|
||||
static DEVICE_ATTR_ADMIN_RO(survivability_mode);
|
||||
|
||||
static ssize_t survivability_info_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buff)
|
||||
{
|
||||
struct xe_survivability_attribute *sa = dev_attr_to_survivability_attr(attr);
|
||||
struct pci_dev *pdev = to_pci_dev(dev);
|
||||
struct xe_device *xe = pdev_to_xe_device(pdev);
|
||||
struct xe_survivability *survivability = &xe->survivability;
|
||||
u32 *info = survivability->info;
|
||||
|
||||
return sysfs_emit(buff, "0x%x\n", info[sa->index]);
|
||||
}
|
||||
|
||||
#define SURVIVABILITY_ATTR_RO(name, _index) \
|
||||
struct xe_survivability_attribute attr_##name = { \
|
||||
.attr = __ATTR(name, 0400, survivability_info_show, NULL), \
|
||||
.index = _index, \
|
||||
}
|
||||
|
||||
SURVIVABILITY_ATTR_RO(capability_info, CAPABILITY_INFO);
|
||||
SURVIVABILITY_ATTR_RO(postcode_trace, POSTCODE_TRACE);
|
||||
SURVIVABILITY_ATTR_RO(postcode_trace_overflow, POSTCODE_TRACE_OVERFLOW);
|
||||
SURVIVABILITY_ATTR_RO(aux_info0, AUX_INFO0);
|
||||
SURVIVABILITY_ATTR_RO(aux_info1, AUX_INFO1);
|
||||
SURVIVABILITY_ATTR_RO(aux_info2, AUX_INFO2);
|
||||
SURVIVABILITY_ATTR_RO(aux_info3, AUX_INFO3);
|
||||
SURVIVABILITY_ATTR_RO(aux_info4, AUX_INFO4);
|
||||
|
||||
static void xe_survivability_mode_fini(void *arg)
|
||||
{
|
||||
struct xe_device *xe = arg;
|
||||
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
|
||||
struct device *dev = &pdev->dev;
|
||||
|
||||
sysfs_remove_file(&dev->kobj, &dev_attr_survivability_mode.attr);
|
||||
device_remove_file(dev, &dev_attr_survivability_mode);
|
||||
}
|
||||
|
||||
static umode_t survivability_info_attrs_visible(struct kobject *kobj, struct attribute *attr,
|
||||
int idx)
|
||||
{
|
||||
struct xe_device *xe = kdev_to_xe_device(kobj_to_dev(kobj));
|
||||
struct xe_survivability *survivability = &xe->survivability;
|
||||
u32 *info = survivability->info;
|
||||
|
||||
if (info[idx])
|
||||
return 0400;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Attributes are ordered according to enum scratch_reg */
|
||||
static struct attribute *survivability_info_attrs[] = {
|
||||
&attr_capability_info.attr.attr,
|
||||
&attr_postcode_trace.attr.attr,
|
||||
&attr_postcode_trace_overflow.attr.attr,
|
||||
&attr_aux_info0.attr.attr,
|
||||
&attr_aux_info1.attr.attr,
|
||||
&attr_aux_info2.attr.attr,
|
||||
&attr_aux_info3.attr.attr,
|
||||
&attr_aux_info4.attr.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static const struct attribute_group survivability_info_group = {
|
||||
.name = "survivability_info",
|
||||
.attrs = survivability_info_attrs,
|
||||
.is_visible = survivability_info_attrs_visible,
|
||||
};
|
||||
|
||||
static int create_survivability_sysfs(struct pci_dev *pdev)
|
||||
{
|
||||
struct device *dev = &pdev->dev;
|
||||
struct xe_device *xe = pdev_to_xe_device(pdev);
|
||||
int ret;
|
||||
|
||||
/* create survivability mode sysfs */
|
||||
ret = sysfs_create_file(&dev->kobj, &dev_attr_survivability_mode.attr);
|
||||
ret = device_create_file(dev, &dev_attr_survivability_mode);
|
||||
if (ret) {
|
||||
dev_warn(dev, "Failed to create survivability sysfs files\n");
|
||||
return ret;
|
||||
|
|
@ -203,6 +279,12 @@ static int create_survivability_sysfs(struct pci_dev *pdev)
|
|||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (check_boot_failure(xe)) {
|
||||
ret = devm_device_add_group(dev, &survivability_info_group);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -239,25 +321,6 @@ static int enable_boot_survivability_mode(struct pci_dev *pdev)
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int init_survivability_mode(struct xe_device *xe)
|
||||
{
|
||||
struct xe_survivability *survivability = &xe->survivability;
|
||||
struct xe_survivability_info *info;
|
||||
|
||||
survivability->size = MAX_SCRATCH_MMIO;
|
||||
|
||||
info = devm_kcalloc(xe->drm.dev, survivability->size, sizeof(*info),
|
||||
GFP_KERNEL);
|
||||
if (!info)
|
||||
return -ENOMEM;
|
||||
|
||||
survivability->info = info;
|
||||
|
||||
populate_survivability_info(xe);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* xe_survivability_mode_is_boot_enabled- check if boot survivability mode is enabled
|
||||
* @xe: xe device instance
|
||||
|
|
@ -325,9 +388,7 @@ int xe_survivability_mode_runtime_enable(struct xe_device *xe)
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
ret = init_survivability_mode(xe);
|
||||
if (ret)
|
||||
return ret;
|
||||
populate_survivability_info(xe);
|
||||
|
||||
ret = create_survivability_sysfs(pdev);
|
||||
if (ret)
|
||||
|
|
@ -356,14 +417,11 @@ int xe_survivability_mode_boot_enable(struct xe_device *xe)
|
|||
{
|
||||
struct xe_survivability *survivability = &xe->survivability;
|
||||
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
|
||||
int ret;
|
||||
|
||||
if (!xe_survivability_mode_is_requested(xe))
|
||||
return 0;
|
||||
|
||||
ret = init_survivability_mode(xe);
|
||||
if (ret)
|
||||
return ret;
|
||||
populate_survivability_info(xe);
|
||||
|
||||
/* Log breadcrumbs but do not enter survivability mode for Critical boot errors */
|
||||
if (survivability->boot_status == CRITICAL_FAILURE) {
|
||||
|
|
|
|||
|
|
@ -9,23 +9,29 @@
|
|||
#include <linux/limits.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
enum scratch_reg {
|
||||
CAPABILITY_INFO,
|
||||
POSTCODE_TRACE,
|
||||
POSTCODE_TRACE_OVERFLOW,
|
||||
AUX_INFO0,
|
||||
AUX_INFO1,
|
||||
AUX_INFO2,
|
||||
AUX_INFO3,
|
||||
AUX_INFO4,
|
||||
MAX_SCRATCH_REG,
|
||||
};
|
||||
|
||||
enum xe_survivability_type {
|
||||
XE_SURVIVABILITY_TYPE_BOOT,
|
||||
XE_SURVIVABILITY_TYPE_RUNTIME,
|
||||
};
|
||||
|
||||
struct xe_survivability_info {
|
||||
char name[NAME_MAX];
|
||||
u32 reg;
|
||||
u32 value;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct xe_survivability: Contains survivability mode information
|
||||
*/
|
||||
struct xe_survivability {
|
||||
/** @info: struct that holds survivability info from scratch registers */
|
||||
struct xe_survivability_info *info;
|
||||
/** @info: survivability debug info */
|
||||
u32 info[MAX_SCRATCH_REG];
|
||||
|
||||
/** @size: number of scratch registers */
|
||||
u32 size;
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user