drm/xe: Move survivability back to xe

Commit d40f275d96 ("drm/xe: Move survivability entirely to xe_pci")
moved the survivability handling to be done entirely in the xe_pci
layer. However there are some issues with that approach:

1) Survivability mode needs at least the mmio initialized, otherwise it
   can't really read a register to decide if it should enter that state
2) SR-IOV mode should be initialized, otherwise it's not possible to
   check if it's VF

Besides, as pointed by Riana the check for
xe_survivability_mode_enable() was wrong in xe_pci_probe() since it's
not a bool return.

Fix that by moving the initialization to be entirely in the xe_device
layer, with the correct dependencies handled: only after mmio and sriov
initialization, and not triggering it on error from
wait_for_lmem_ready(). This restores the trigger behavior before that
commit. The xe_pci layer now only checks for "is it enabled?",
like it's doing in xe_pci_suspend()/xe_pci_remove(), etc.

Cc: Riana Tauro <riana.tauro@intel.com>
Fixes: d40f275d96 ("drm/xe: Move survivability entirely to xe_pci")
Reviewed-by: Riana Tauro <riana.tauro@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20250314-fix-survivability-v5-1-fdb3559ea965@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
This commit is contained in:
Lucas De Marchi 2025-03-14 06:48:58 -07:00
parent 613256e67c
commit 86b5e0dbba
4 changed files with 34 additions and 19 deletions

View File

@ -53,6 +53,7 @@
#include "xe_pxp.h"
#include "xe_query.h"
#include "xe_shrinker.h"
#include "xe_survivability_mode.h"
#include "xe_sriov.h"
#include "xe_tile.h"
#include "xe_ttm_stolen_mgr.h"
@ -711,8 +712,20 @@ int xe_device_probe_early(struct xe_device *xe)
sriov_update_device_info(xe);
err = xe_pcode_probe_early(xe);
if (err)
return err;
if (err) {
int save_err = err;
/*
* Try to leave device in survivability mode if device is
* possible, but still return the previous error for error
* propagation
*/
err = xe_survivability_mode_enable(xe);
if (err)
return err;
return save_err;
}
err = wait_for_lmem_ready(xe);
if (err)

View File

@ -807,16 +807,14 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
return err;
err = xe_device_probe_early(xe);
/*
* In Boot Survivability mode, no drm card is exposed and driver is
* loaded with bare minimum to allow for firmware to be flashed through
* mei. If early probe fails, check if survivability mode is flagged by
* HW to be enabled. In that case enable it and return success.
*/
if (err) {
if (xe_survivability_mode_required(xe) &&
xe_survivability_mode_enable(xe))
/*
* In Boot Survivability mode, no drm card is exposed and driver
* is loaded with bare minimum to allow for firmware to be
* flashed through mei. If early probe failed, but it managed to
* enable survivability mode, return success.
*/
if (xe_survivability_mode_is_enabled(xe))
return 0;
return err;

View File

@ -178,15 +178,16 @@ bool xe_survivability_mode_is_enabled(struct xe_device *xe)
return xe->survivability.mode;
}
/**
* xe_survivability_mode_required - checks if survivability mode is required
* @xe: xe device instance
/*
* survivability_mode_requested - check if it's possible to enable
* survivability mode and that was requested by firmware
*
* This function reads the boot status from Pcode
* This function reads the boot status from Pcode.
*
* Return: true if boot status indicates failure, false otherwise
* Return: true if platform support is available and boot status indicates
* failure, false otherwise.
*/
bool xe_survivability_mode_required(struct xe_device *xe)
static bool survivability_mode_requested(struct xe_device *xe)
{
struct xe_survivability *survivability = &xe->survivability;
struct xe_mmio *mmio = xe_root_tile_mmio(xe);
@ -208,7 +209,8 @@ bool xe_survivability_mode_required(struct xe_device *xe)
*
* Initialize survivability information and enable survivability mode
*
* Return: 0 for success, negative error code otherwise.
* Return: 0 if survivability mode is enabled or not requested; negative error
* code otherwise.
*/
int xe_survivability_mode_enable(struct xe_device *xe)
{
@ -216,6 +218,9 @@ int xe_survivability_mode_enable(struct xe_device *xe)
struct xe_survivability_info *info;
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
if (!survivability_mode_requested(xe))
return 0;
survivability->size = MAX_SCRATCH_MMIO;
info = devm_kcalloc(xe->drm.dev, survivability->size, sizeof(*info),

View File

@ -12,6 +12,5 @@ struct xe_device;
int xe_survivability_mode_enable(struct xe_device *xe);
bool xe_survivability_mode_is_enabled(struct xe_device *xe);
bool xe_survivability_mode_required(struct xe_device *xe);
#endif /* _XE_SURVIVABILITY_MODE_H_ */