amd/amdgpu: improve VF recover time

1. change AMDGPU_VF2PF_UPDATE_MAX_RETRY_LIMIT from 30 to 5.
2. set fatel error detected flag.

Signed-off-by: Zhigang Luo <Zhigang.Luo@amd.com>
Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Zhigang Luo 2024-03-20 10:40:27 -04:00 committed by Alex Deucher
parent dfb15c4ab5
commit d1999b4017
3 changed files with 3 additions and 1 deletions

View File

@ -4994,6 +4994,7 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
r = amdgpu_virt_reset_gpu(adev);
if (r)
return r;
amdgpu_ras_set_fed(adev, false);
amdgpu_irq_gpu_reset_resume_helper(adev);
/* some sw clean up VF needs to do before recover */

View File

@ -598,6 +598,7 @@ static void amdgpu_virt_update_vf2pf_work_item(struct work_struct *work)
adev->virt.vf2pf_update_retry_cnt++;
if ((adev->virt.vf2pf_update_retry_cnt >= AMDGPU_VF2PF_UPDATE_MAX_RETRY_LIMIT) &&
amdgpu_sriov_runtime(adev) && !amdgpu_in_reset(adev)) {
amdgpu_ras_set_fed(adev, true);
if (amdgpu_reset_domain_schedule(adev->reset_domain,
&adev->virt.flr_work))
return;

View File

@ -52,7 +52,7 @@
/* tonga/fiji use this offset */
#define mmBIF_IOV_FUNC_IDENTIFIER 0x1503
#define AMDGPU_VF2PF_UPDATE_MAX_RETRY_LIMIT 30
#define AMDGPU_VF2PF_UPDATE_MAX_RETRY_LIMIT 5
enum amdgpu_sriov_vf_mode {
SRIOV_VF_MODE_BARE_METAL = 0,