drm/amdgpu: move reset debug disable handling

Move everything to the supported resets masks rather than
having an explicit misc checks for this.

Reviewed-by: Jesse Zhang <Jesse.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Alex Deucher 2025-10-14 17:01:05 -04:00
parent b5e333e634
commit ad0a48e531
12 changed files with 32 additions and 22 deletions

View File

@ -130,11 +130,9 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
}
/* attempt a per ring reset */
if (unlikely(adev->debug_disable_gpu_ring_reset)) {
dev_err(adev->dev, "Ring reset disabled by debug mask\n");
} else if (amdgpu_gpu_recovery &&
amdgpu_ring_is_reset_type_supported(ring, AMDGPU_RESET_TYPE_PER_QUEUE) &&
ring->funcs->reset) {
if (amdgpu_gpu_recovery &&
amdgpu_ring_is_reset_type_supported(ring, AMDGPU_RESET_TYPE_PER_QUEUE) &&
ring->funcs->reset) {
dev_err(adev->dev, "Starting %s ring reset\n",
s_job->sched->name);
r = amdgpu_ring_reset(ring, job->vmid, job->hw_fence);

View File

@ -468,9 +468,6 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
ktime_t deadline;
bool ret;
if (unlikely(ring->adev->debug_disable_soft_recovery))
return false;
deadline = ktime_add_us(ktime_get(), 10000);
if (amdgpu_sriov_vf(ring->adev) || !ring->funcs->soft_recovery || !fence)

View File

@ -4956,7 +4956,8 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block)
amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
adev->gfx.compute_supported_reset =
amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
if (!amdgpu_sriov_vf(adev)) {
if (!amdgpu_sriov_vf(adev) &&
!adev->debug_disable_gpu_ring_reset) {
adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
}

View File

@ -1821,13 +1821,15 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
case IP_VERSION(11, 0, 3):
if ((adev->gfx.me_fw_version >= 2280) &&
(adev->gfx.mec_fw_version >= 2410) &&
!amdgpu_sriov_vf(adev)) {
!amdgpu_sriov_vf(adev) &&
!adev->debug_disable_gpu_ring_reset) {
adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
}
break;
default:
if (!amdgpu_sriov_vf(adev)) {
if (!amdgpu_sriov_vf(adev) &&
!adev->debug_disable_gpu_ring_reset) {
adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
}

View File

@ -1548,7 +1548,8 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
case IP_VERSION(12, 0, 1):
if ((adev->gfx.me_fw_version >= 2660) &&
(adev->gfx.mec_fw_version >= 2920) &&
!amdgpu_sriov_vf(adev)) {
!amdgpu_sriov_vf(adev) &&
!adev->debug_disable_gpu_ring_reset) {
adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
}

View File

@ -2409,7 +2409,7 @@ static int gfx_v9_0_sw_init(struct amdgpu_ip_block *ip_block)
amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
adev->gfx.compute_supported_reset =
amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
if (!amdgpu_sriov_vf(adev))
if (!amdgpu_sriov_vf(adev) && !adev->debug_disable_gpu_ring_reset)
adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, 0);

View File

@ -1149,14 +1149,16 @@ static int gfx_v9_4_3_sw_init(struct amdgpu_ip_block *ip_block)
case IP_VERSION(9, 4, 3):
case IP_VERSION(9, 4, 4):
if ((adev->gfx.mec_fw_version >= 155) &&
!amdgpu_sriov_vf(adev)) {
!amdgpu_sriov_vf(adev) &&
!adev->debug_disable_gpu_ring_reset) {
adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_PIPE;
}
break;
case IP_VERSION(9, 5, 0):
if ((adev->gfx.mec_fw_version >= 21) &&
!amdgpu_sriov_vf(adev)) {
!amdgpu_sriov_vf(adev) &&
!adev->debug_disable_gpu_ring_reset) {
adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_PIPE;
}

View File

@ -2361,11 +2361,15 @@ static void sdma_v4_4_2_update_reset_mask(struct amdgpu_device *adev)
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 4, 3):
case IP_VERSION(9, 4, 4):
if ((adev->gfx.mec_fw_version >= 0xb0) && amdgpu_dpm_reset_sdma_is_supported(adev))
if ((adev->gfx.mec_fw_version >= 0xb0) &&
amdgpu_dpm_reset_sdma_is_supported(adev) &&
!adev->debug_disable_gpu_ring_reset)
adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
break;
case IP_VERSION(9, 5, 0):
if ((adev->gfx.mec_fw_version >= 0xf) && amdgpu_dpm_reset_sdma_is_supported(adev))
if ((adev->gfx.mec_fw_version >= 0xf) &&
amdgpu_dpm_reset_sdma_is_supported(adev) &&
!adev->debug_disable_gpu_ring_reset)
adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
break;
default:

View File

@ -1429,7 +1429,8 @@ static int sdma_v5_0_sw_init(struct amdgpu_ip_block *ip_block)
case IP_VERSION(5, 0, 2):
case IP_VERSION(5, 0, 5):
if ((adev->sdma.instance[0].fw_version >= 35) &&
!amdgpu_sriov_vf(adev))
!amdgpu_sriov_vf(adev) &&
!adev->debug_disable_gpu_ring_reset)
adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
break;
default:

View File

@ -1348,12 +1348,14 @@ static int sdma_v5_2_sw_init(struct amdgpu_ip_block *ip_block)
case IP_VERSION(5, 2, 3):
case IP_VERSION(5, 2, 4):
if ((adev->sdma.instance[0].fw_version >= 76) &&
!amdgpu_sriov_vf(adev))
!amdgpu_sriov_vf(adev) &&
!adev->debug_disable_gpu_ring_reset)
adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
break;
case IP_VERSION(5, 2, 5):
if ((adev->sdma.instance[0].fw_version >= 34) &&
!amdgpu_sriov_vf(adev))
!amdgpu_sriov_vf(adev) &&
!adev->debug_disable_gpu_ring_reset)
adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
break;
default:

View File

@ -1356,7 +1356,8 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block)
case IP_VERSION(6, 0, 2):
case IP_VERSION(6, 0, 3):
if ((adev->sdma.instance[0].fw_version >= 21) &&
!amdgpu_sriov_vf(adev))
!amdgpu_sriov_vf(adev) &&
!adev->debug_disable_gpu_ring_reset)
adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
break;
default:

View File

@ -1337,7 +1337,8 @@ static int sdma_v7_0_sw_init(struct amdgpu_ip_block *ip_block)
adev->sdma.supported_reset =
amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring);
if (!amdgpu_sriov_vf(adev))
if (!amdgpu_sriov_vf(adev) &&
!adev->debug_disable_gpu_ring_reset)
adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
r = amdgpu_sdma_sysfs_reset_mask_init(adev);