mirror of
https://github.com/torvalds/linux.git
synced 2026-05-31 18:43:33 +02:00
drm/amdgpu: refine ras error injection when eeprom initialization failed
when eeprom initialization failed, we still support ras error injection, and reserve bad pages, but do not save bad pages to eeprom Signed-off-by: ganglxie <ganglxie@amd.com> Reviewed-by: Tao Zhou <tao.zhou1@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
0b7f13551e
commit
cfce8f4fa7
|
|
@ -3006,6 +3006,15 @@ int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev,
|
|||
return 0;
|
||||
}
|
||||
|
||||
if (!con->eeprom_control.is_eeprom_valid) {
|
||||
dev_warn(adev->dev,
|
||||
"Failed to save EEPROM table data because of EEPROM data corruption!");
|
||||
if (new_cnt)
|
||||
*new_cnt = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
mutex_lock(&con->recovery_lock);
|
||||
control = &con->eeprom_control;
|
||||
data = con->eh_data;
|
||||
|
|
@ -3491,8 +3500,7 @@ int amdgpu_ras_init_badpage_info(struct amdgpu_device *adev)
|
|||
|
||||
control = &con->eeprom_control;
|
||||
ret = amdgpu_ras_eeprom_init(control);
|
||||
if (ret)
|
||||
return ret;
|
||||
control->is_eeprom_valid = !ret;
|
||||
|
||||
if (!adev->umc.ras || !adev->umc.ras->convert_ras_err_addr)
|
||||
control->ras_num_pa_recs = control->ras_num_recs;
|
||||
|
|
@ -3501,10 +3509,12 @@ int amdgpu_ras_init_badpage_info(struct amdgpu_device *adev)
|
|||
adev->umc.ras->get_retire_flip_bits)
|
||||
adev->umc.ras->get_retire_flip_bits(adev);
|
||||
|
||||
if (control->ras_num_recs) {
|
||||
if (control->ras_num_recs && control->is_eeprom_valid) {
|
||||
ret = amdgpu_ras_load_bad_pages(adev);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (ret) {
|
||||
control->is_eeprom_valid = false;
|
||||
return 0;
|
||||
}
|
||||
|
||||
amdgpu_dpm_send_hbm_bad_pages_num(
|
||||
adev, control->ras_num_bad_pages);
|
||||
|
|
@ -3523,7 +3533,7 @@ int amdgpu_ras_init_badpage_info(struct amdgpu_device *adev)
|
|||
dev_warn(adev->dev, "Failed to format RAS EEPROM data in V3 version!\n");
|
||||
}
|
||||
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int amdgpu_ras_recovery_init(struct amdgpu_device *adev, bool init_bp_info)
|
||||
|
|
|
|||
|
|
@ -114,6 +114,8 @@ struct amdgpu_ras_eeprom_control {
|
|||
/* Record channel info which occurred bad pages
|
||||
*/
|
||||
u32 bad_channel_bitmap;
|
||||
|
||||
bool is_eeprom_valid;
|
||||
};
|
||||
|
||||
/*
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user