mirror of
https://github.com/torvalds/linux.git
synced 2026-05-28 00:53:34 +02:00
drm/panthor: Recover from panthor_gpu_flush_caches() failures
We have seen a few cases where the whole memory subsystem is blocked
and flush operations never complete. When that happens, we want to:
- schedule a reset, so we can recover from this situation
- in the reset path, we need to reset the pending_reqs so we can send
new commands after the reset
- if more panthor_gpu_flush_caches() operations are queued after
the timeout, we skip them and return -EIO directly to avoid needless
waits (the memory block won't miraculously work again)
Note that we drop the WARN_ON()s because these hangs can be triggered
with buggy GPU jobs created by the UMD, and there's no way we can
prevent it. We do keep the error messages though.
v2:
- New patch
v3:
- Collect R-b
- Explicitly mention the fact we dropped the WARN_ON()s in the commit
message
v4:
- No changes
Fixes: 5cd894e258 ("drm/panthor: Add the GPU logical block")
Reviewed-by: Steven Price <steven.price@arm.com>
Link: https://patch.msgid.link/20251128084841.3804658-4-boris.brezillon@collabora.com
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
This commit is contained in:
parent
151df689fb
commit
3c0a60195b
|
|
@ -289,38 +289,42 @@ int panthor_gpu_l2_power_on(struct panthor_device *ptdev)
|
|||
int panthor_gpu_flush_caches(struct panthor_device *ptdev,
|
||||
u32 l2, u32 lsc, u32 other)
|
||||
{
|
||||
bool timedout = false;
|
||||
unsigned long flags;
|
||||
int ret = 0;
|
||||
|
||||
/* Serialize cache flush operations. */
|
||||
guard(mutex)(&ptdev->gpu->cache_flush_lock);
|
||||
|
||||
spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags);
|
||||
if (!drm_WARN_ON(&ptdev->base,
|
||||
ptdev->gpu->pending_reqs & GPU_IRQ_CLEAN_CACHES_COMPLETED)) {
|
||||
if (!(ptdev->gpu->pending_reqs & GPU_IRQ_CLEAN_CACHES_COMPLETED)) {
|
||||
ptdev->gpu->pending_reqs |= GPU_IRQ_CLEAN_CACHES_COMPLETED;
|
||||
gpu_write(ptdev, GPU_CMD, GPU_FLUSH_CACHES(l2, lsc, other));
|
||||
} else {
|
||||
ret = -EIO;
|
||||
}
|
||||
spin_unlock_irqrestore(&ptdev->gpu->reqs_lock, flags);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (!wait_event_timeout(ptdev->gpu->reqs_acked,
|
||||
!(ptdev->gpu->pending_reqs & GPU_IRQ_CLEAN_CACHES_COMPLETED),
|
||||
msecs_to_jiffies(100))) {
|
||||
spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags);
|
||||
if ((ptdev->gpu->pending_reqs & GPU_IRQ_CLEAN_CACHES_COMPLETED) != 0 &&
|
||||
!(gpu_read(ptdev, GPU_INT_RAWSTAT) & GPU_IRQ_CLEAN_CACHES_COMPLETED))
|
||||
timedout = true;
|
||||
ret = -ETIMEDOUT;
|
||||
else
|
||||
ptdev->gpu->pending_reqs &= ~GPU_IRQ_CLEAN_CACHES_COMPLETED;
|
||||
spin_unlock_irqrestore(&ptdev->gpu->reqs_lock, flags);
|
||||
}
|
||||
|
||||
if (timedout) {
|
||||
if (ret) {
|
||||
panthor_device_schedule_reset(ptdev);
|
||||
drm_err(&ptdev->base, "Flush caches timeout");
|
||||
return -ETIMEDOUT;
|
||||
}
|
||||
|
||||
return 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -360,6 +364,7 @@ int panthor_gpu_soft_reset(struct panthor_device *ptdev)
|
|||
return -ETIMEDOUT;
|
||||
}
|
||||
|
||||
ptdev->gpu->pending_reqs = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user