mirror of
https://github.com/torvalds/linux.git
synced 2026-06-02 03:24:19 +02:00
drm/amdgpu: set an error on all fences from a bad context
When we backup ring contents to reemit after a queue reset,
we don't backup ring contents from the bad context. When
we signal the fences, we should set an error on those
fences as well.
v2: misc cleanups
v3: add locking for fence error, fix comment (Christian)
v4: fix wrap around, locking (Christian)
Fixes: 77cc0da39c ("drm/amdgpu: track ring state associated with a fence")
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
1f22fcb88b
commit
ff780f4f80
|
|
@ -758,11 +758,42 @@ void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring)
|
|||
* @fence: fence of the ring to signal
|
||||
*
|
||||
*/
|
||||
void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *fence)
|
||||
void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af)
|
||||
{
|
||||
dma_fence_set_error(&fence->base, -ETIME);
|
||||
amdgpu_fence_write(fence->ring, fence->seq);
|
||||
amdgpu_fence_process(fence->ring);
|
||||
struct dma_fence *unprocessed;
|
||||
struct dma_fence __rcu **ptr;
|
||||
struct amdgpu_fence *fence;
|
||||
struct amdgpu_ring *ring = af->ring;
|
||||
unsigned long flags;
|
||||
u32 seq, last_seq;
|
||||
|
||||
last_seq = amdgpu_fence_read(ring) & ring->fence_drv.num_fences_mask;
|
||||
seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask;
|
||||
|
||||
/* mark all fences from the guilty context with an error */
|
||||
spin_lock_irqsave(&ring->fence_drv.lock, flags);
|
||||
do {
|
||||
last_seq++;
|
||||
last_seq &= ring->fence_drv.num_fences_mask;
|
||||
|
||||
ptr = &ring->fence_drv.fences[last_seq];
|
||||
rcu_read_lock();
|
||||
unprocessed = rcu_dereference(*ptr);
|
||||
|
||||
if (unprocessed && !dma_fence_is_signaled_locked(unprocessed)) {
|
||||
fence = container_of(unprocessed, struct amdgpu_fence, base);
|
||||
|
||||
if (fence == af)
|
||||
dma_fence_set_error(&fence->base, -ETIME);
|
||||
else if (fence->context == af->context)
|
||||
dma_fence_set_error(&fence->base, -ECANCELED);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
} while (last_seq != seq);
|
||||
spin_unlock_irqrestore(&ring->fence_drv.lock, flags);
|
||||
/* signal the guilty fence */
|
||||
amdgpu_fence_write(ring, af->seq);
|
||||
amdgpu_fence_process(ring);
|
||||
}
|
||||
|
||||
void amdgpu_fence_save_wptr(struct dma_fence *fence)
|
||||
|
|
|
|||
|
|
@ -811,7 +811,7 @@ int amdgpu_ring_reset_helper_end(struct amdgpu_ring *ring,
|
|||
if (r)
|
||||
return r;
|
||||
|
||||
/* signal the fence of the bad job */
|
||||
/* signal the guilty fence and set an error on all fences from the context */
|
||||
if (guilty_fence)
|
||||
amdgpu_fence_driver_guilty_force_completion(guilty_fence);
|
||||
/* Re-emit the non-guilty commands */
|
||||
|
|
|
|||
|
|
@ -155,7 +155,7 @@ extern const struct drm_sched_backend_ops amdgpu_sched_ops;
|
|||
void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring);
|
||||
void amdgpu_fence_driver_set_error(struct amdgpu_ring *ring, int error);
|
||||
void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring);
|
||||
void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *fence);
|
||||
void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af);
|
||||
void amdgpu_fence_save_wptr(struct dma_fence *fence);
|
||||
|
||||
int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring);
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user