drm/amd/ras: add check safety watermark func for pmfw eeprom

add check safety watermark func for pmfw eeprom

Signed-off-by: Gangliang Xie <ganglxie@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Gangliang Xie 2025-12-15 13:01:04 +08:00 committed by Alex Deucher
parent db217d08af
commit 5042806557
4 changed files with 37 additions and 0 deletions

View File

@ -571,6 +571,9 @@ bool amdgpu_ras_mgr_check_eeprom_safety_watermark(struct amdgpu_device *adev)
if (!amdgpu_ras_mgr_is_ready(adev))
return false;
if (ras_fw_eeprom_supported(ras_mgr->ras_core))
return ras_fw_eeprom_check_safety_watermark(ras_mgr->ras_core);
return ras_eeprom_check_safety_watermark(ras_mgr->ras_core);
}

View File

@ -563,6 +563,9 @@ bool ras_core_is_ready(struct ras_core_context *ras_core)
bool ras_core_check_safety_watermark(struct ras_core_context *ras_core)
{
if (ras_fw_eeprom_supported(ras_core))
return ras_fw_eeprom_check_safety_watermark(ras_core);
return ras_eeprom_check_safety_watermark(ras_core);
}

View File

@ -190,3 +190,33 @@ int ras_fw_eeprom_reset_table(struct ras_core_context *ras_core)
return res;
}
bool ras_fw_eeprom_check_safety_watermark(struct ras_core_context *ras_core)
{
struct ras_fw_eeprom_control *control = &ras_core->ras_fw_eeprom;
bool ret = false;
int bad_page_count;
if (!control->record_threshold_config)
return false;
bad_page_count = ras_umc_get_badpage_count(ras_core);
if (bad_page_count > control->record_threshold_count)
RAS_DEV_WARN(ras_core->dev, "RAS records:%d exceed threshold:%d",
bad_page_count, control->record_threshold_count);
if ((control->record_threshold_config == WARN_NONSTOP_OVER_THRESHOLD) ||
(control->record_threshold_config == NONSTOP_OVER_THRESHOLD)) {
RAS_DEV_WARN(ras_core->dev,
"Please consult AMD Service Action Guide (SAG) for appropriate service procedures.\n");
ret = false;
} else {
ras_core->is_rma = true;
RAS_DEV_WARN(ras_core->dev,
"Please consider adjusting the customized threshold.\n");
ret = true;
}
return ret;
}

View File

@ -67,5 +67,6 @@ int ras_fw_get_badpage_ipid(struct ras_core_context *ras_core,
int ras_fw_erase_ras_table(struct ras_core_context *ras_core,
uint32_t *result);
int ras_fw_eeprom_reset_table(struct ras_core_context *ras_core);
bool ras_fw_eeprom_check_safety_watermark(struct ras_core_context *ras_core);
#endif