drm/amdgpu/vcn: add a helper framework for engine resets

With engine resets we reset all queues on the engine rather
than just a single queue.  Add a framework to handle this
similar to SDMA.

Reviewed-by: Sathishkumar S <sathishkumar.sundararaju@amd.com>
Tested-by: Sathishkumar S <sathishkumar.sundararaju@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Alex Deucher 2025-06-16 16:01:25 -04:00
parent 3871149081
commit 7b6cde7f4e
2 changed files with 84 additions and 1 deletions

View File

@ -134,6 +134,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev, int i)
mutex_init(&adev->vcn.inst[i].vcn1_jpeg1_workaround);
mutex_init(&adev->vcn.inst[i].vcn_pg_lock);
mutex_init(&adev->vcn.inst[i].engine_reset_mutex);
atomic_set(&adev->vcn.inst[i].total_submission_cnt, 0);
INIT_DELAYED_WORK(&adev->vcn.inst[i].idle_work, amdgpu_vcn_idle_work_handler);
atomic_set(&adev->vcn.inst[i].dpg_enc_submission_cnt, 0);
@ -1451,3 +1452,81 @@ int vcn_set_powergating_state(struct amdgpu_ip_block *ip_block,
return ret;
}
/**
* amdgpu_vcn_reset_engine - Reset a specific VCN engine
* @adev: Pointer to the AMDGPU device
* @instance_id: VCN engine instance to reset
*
* Returns: 0 on success, or a negative error code on failure.
*/
static int amdgpu_vcn_reset_engine(struct amdgpu_device *adev,
uint32_t instance_id)
{
struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[instance_id];
int r, i;
mutex_lock(&vinst->engine_reset_mutex);
/* Stop the scheduler's work queue for the dec and enc rings if they are running.
* This ensures that no new tasks are submitted to the queues while
* the reset is in progress.
*/
drm_sched_wqueue_stop(&vinst->ring_dec.sched);
for (i = 0; i < vinst->num_enc_rings; i++)
drm_sched_wqueue_stop(&vinst->ring_enc[i].sched);
/* Perform the VCN reset for the specified instance */
r = vinst->reset(vinst);
if (r)
goto unlock;
r = amdgpu_ring_test_ring(&vinst->ring_dec);
if (r)
goto unlock;
for (i = 0; i < vinst->num_enc_rings; i++) {
r = amdgpu_ring_test_ring(&vinst->ring_enc[i]);
if (r)
goto unlock;
}
amdgpu_fence_driver_force_completion(&vinst->ring_dec);
for (i = 0; i < vinst->num_enc_rings; i++)
amdgpu_fence_driver_force_completion(&vinst->ring_enc[i]);
/* Restart the scheduler's work queue for the dec and enc rings
* if they were stopped by this function. This allows new tasks
* to be submitted to the queues after the reset is complete.
*/
drm_sched_wqueue_start(&vinst->ring_dec.sched);
for (i = 0; i < vinst->num_enc_rings; i++)
drm_sched_wqueue_start(&vinst->ring_enc[i].sched);
unlock:
mutex_unlock(&vinst->engine_reset_mutex);
return r;
}
/**
* amdgpu_vcn_ring_reset - Reset a VCN ring
* @ring: ring to reset
* @vmid: vmid of guilty job
* @timedout_fence: fence of timed out job
*
* This helper is for VCN blocks without unified queues because
* resetting the engine resets all queues in that case. With
* unified queues we have one queue per engine.
* Returns: 0 on success, or a negative error code on failure.
*/
int amdgpu_vcn_ring_reset(struct amdgpu_ring *ring,
unsigned int vmid,
struct amdgpu_fence *timedout_fence)
{
struct amdgpu_device *adev = ring->adev;
if (!(adev->vcn.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
return -EOPNOTSUPP;
if (adev->vcn.inst[ring->me].using_unified_queue)
return -EINVAL;
return amdgpu_vcn_reset_engine(adev, ring->me);
}

View File

@ -330,7 +330,9 @@ struct amdgpu_vcn_inst {
struct dpg_pause_state *new_state);
int (*set_pg_state)(struct amdgpu_vcn_inst *vinst,
enum amd_powergating_state state);
int (*reset)(struct amdgpu_vcn_inst *vinst);
bool using_unified_queue;
struct mutex engine_reset_mutex;
};
struct amdgpu_vcn_ras {
@ -552,5 +554,7 @@ void amdgpu_debugfs_vcn_sched_mask_init(struct amdgpu_device *adev);
int vcn_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state);
int amdgpu_vcn_ring_reset(struct amdgpu_ring *ring,
unsigned int vmid,
struct amdgpu_fence *guilty_fence);
#endif