drm/imagination: Clear runtime PM errors while resetting the GPU

The runtime PM might be left in error state if one of the callbacks
returned an error, e.g. if the (auto)suspend callback failed following
a firmware crash.

When that happens, any further attempt to acquire or release a power
reference will then also fail, making it impossible to do anything else
with the GPU. The driver logic will eventually reach the reset code.

In pvr_power_reset(), replace pvr_power_get() with a new API
pvr_power_get_clear() which also attempts to clear any runtime PM error
state if acquiring a power reference is not possible.

Signed-off-by: Alessio Belle <alessio.belle@imgtec.com>
Reviewed-by: Matt Coster <matt.coster@imgtec.com>
Link: https://lore.kernel.org/r/20250624-clear-rpm-errors-gpu-reset-v1-1-b8ff2ae55aac@imgtec.com
Signed-off-by: Matt Coster <matt.coster@imgtec.com>
This commit is contained in:
Alessio Belle 2025-06-24 16:01:31 +01:00 committed by Matt Coster
parent c03ea34cbf
commit 551507e0d0
No known key found for this signature in database
GPG Key ID: 79BC19F3D9DE6AB0

View File

@ -340,6 +340,63 @@ pvr_power_device_idle(struct device *dev)
return pvr_power_is_idle(pvr_dev) ? 0 : -EBUSY;
}
static int
pvr_power_clear_error(struct pvr_device *pvr_dev)
{
struct device *dev = from_pvr_device(pvr_dev)->dev;
int err;
/* Ensure the device state is known and nothing is happening past this point */
pm_runtime_disable(dev);
/* Attempt to clear the runtime PM error by setting the current state again */
if (pm_runtime_status_suspended(dev))
err = pm_runtime_set_suspended(dev);
else
err = pm_runtime_set_active(dev);
if (err) {
drm_err(from_pvr_device(pvr_dev),
"%s: Failed to clear runtime PM error (new error %d)\n",
__func__, err);
}
pm_runtime_enable(dev);
return err;
}
/**
* pvr_power_get_clear() - Acquire a power reference, correcting any errors
* @pvr_dev: Device pointer
*
* Attempt to acquire a power reference on the device. If the runtime PM
* is in error state, attempt to clear the error and retry.
*
* Returns:
* * 0 on success, or
* * Any error code returned by pvr_power_get() or the runtime PM API.
*/
static int
pvr_power_get_clear(struct pvr_device *pvr_dev)
{
int err;
err = pvr_power_get(pvr_dev);
if (err == 0)
return err;
drm_warn(from_pvr_device(pvr_dev),
"%s: pvr_power_get returned error %d, attempting recovery\n",
__func__, err);
err = pvr_power_clear_error(pvr_dev);
if (err)
return err;
return pvr_power_get(pvr_dev);
}
/**
* pvr_power_reset() - Reset the GPU
* @pvr_dev: Device pointer
@ -364,7 +421,7 @@ pvr_power_reset(struct pvr_device *pvr_dev, bool hard_reset)
* Take a power reference during the reset. This should prevent any interference with the
* power state during reset.
*/
WARN_ON(pvr_power_get(pvr_dev));
WARN_ON(pvr_power_get_clear(pvr_dev));
down_write(&pvr_dev->reset_sem);