mirror of
https://github.com/torvalds/linux.git
synced 2026-05-23 22:52:19 +02:00
drm/amd/ras: adapt page retirement process for pmfw eeprom
read bad page data from pmfw eeprom when retirement is triggered, use timestamp read from eeprom Signed-off-by: Gangliang Xie <ganglxie@amd.com> Reviewed-by: Tao Zhou <tao.zhou1@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
42c46be2ec
commit
72289903a2
|
|
@ -234,16 +234,27 @@ static int aca_log_bad_bank(struct ras_core_context *ras_core,
|
|||
bank_ecc->de_count) {
|
||||
struct ras_bank_ecc ras_ecc = {0};
|
||||
|
||||
ras_ecc.nps = ras_core_get_curr_nps_mode(ras_core);
|
||||
ras_ecc.addr = bank_ecc->bank_info.addr;
|
||||
ras_ecc.ipid = bank_ecc->bank_info.ipid;
|
||||
ras_ecc.status = bank_ecc->bank_info.status;
|
||||
ras_ecc.seq_no = bank->seq_no;
|
||||
if (ras_fw_eeprom_supported(ras_core)) {
|
||||
ret = ras_fw_eeprom_update_record(ras_core, &ras_ecc);
|
||||
if (!ret) {
|
||||
ras_ecc.nps = ras_core_get_curr_nps_mode(ras_core);
|
||||
ras_ecc.status = bank_ecc->bank_info.status;
|
||||
ras_ecc.seq_no = bank->seq_no;
|
||||
}
|
||||
} else {
|
||||
ras_ecc.nps = ras_core_get_curr_nps_mode(ras_core);
|
||||
ras_ecc.addr = bank_ecc->bank_info.addr;
|
||||
ras_ecc.ipid = bank_ecc->bank_info.ipid;
|
||||
ras_ecc.status = bank_ecc->bank_info.status;
|
||||
ras_ecc.seq_no = bank->seq_no;
|
||||
}
|
||||
|
||||
if (ras_core_gpu_in_reset(ras_core))
|
||||
ras_umc_log_bad_bank_pending(ras_core, &ras_ecc);
|
||||
else
|
||||
ras_umc_log_bad_bank(ras_core, &ras_ecc);
|
||||
if (!ret) {
|
||||
if (ras_core_gpu_in_reset(ras_core))
|
||||
ras_umc_log_bad_bank_pending(ras_core, &ras_ecc);
|
||||
else
|
||||
ras_umc_log_bad_bank(ras_core, &ras_ecc);
|
||||
}
|
||||
}
|
||||
|
||||
aca_report_ecc_info(ras_core,
|
||||
|
|
|
|||
|
|
@ -24,6 +24,8 @@
|
|||
|
||||
#include "ras.h"
|
||||
|
||||
#define RAS_SMU_MESSAGE_TIMEOUT_MS 1000 /* 1s */
|
||||
|
||||
void ras_fw_init_feature_flags(struct ras_core_context *ras_core)
|
||||
{
|
||||
struct ras_mp1 *mp1 = &ras_core->ras_mp1;
|
||||
|
|
@ -329,3 +331,41 @@ uint32_t ras_fw_eeprom_get_record_count(struct ras_core_context *ras_core)
|
|||
|
||||
return ras_core->ras_fw_eeprom.ras_num_recs;
|
||||
}
|
||||
|
||||
int ras_fw_eeprom_update_record(struct ras_core_context *ras_core,
|
||||
struct ras_bank_ecc *ras_ecc)
|
||||
{
|
||||
struct ras_fw_eeprom_control *control = &ras_core->ras_fw_eeprom;
|
||||
int ret, retry = 20;
|
||||
u32 recs_num_new = control->ras_num_recs;
|
||||
|
||||
do {
|
||||
/* 1000ms timeout is long enough, smu_get_badpage_count won't
|
||||
* return -EBUSY before timeout.
|
||||
*/
|
||||
ret = ras_fw_get_badpage_count(ras_core,
|
||||
&recs_num_new, RAS_SMU_MESSAGE_TIMEOUT_MS);
|
||||
if (!ret &&
|
||||
(recs_num_new == control->ras_num_recs)) {
|
||||
/* record number update in PMFW needs some time,
|
||||
* smu_get_badpage_count may return immediately without
|
||||
* count update, sleep for a while and retry again.
|
||||
*/
|
||||
msleep(50);
|
||||
retry--;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
} while (retry);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (recs_num_new > control->ras_num_recs)
|
||||
ret = ras_fw_eeprom_read_idx(ras_core, 0,
|
||||
ras_ecc, control->ras_num_recs, 1);
|
||||
else
|
||||
ret = -EINVAL;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -75,5 +75,7 @@ int ras_fw_eeprom_read_idx(struct ras_core_context *ras_core,
|
|||
struct ras_bank_ecc *ras_ecc,
|
||||
u32 rec_idx, const u32 num);
|
||||
uint32_t ras_fw_eeprom_get_record_count(struct ras_core_context *ras_core);
|
||||
int ras_fw_eeprom_update_record(struct ras_core_context *ras_core,
|
||||
struct ras_bank_ecc *ras_ecc);
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -373,6 +373,9 @@ static int umc_v12_0_bank_to_eeprom_record(struct ras_core_context *ras_core,
|
|||
ACA_ADDR_2_ERR_ADDR(bank->addr), ACA_IPID_2_UMC_INST(bank->ipid),
|
||||
&nps_addr, bank->nps, record);
|
||||
|
||||
if (ras_fw_eeprom_supported(ras_core) && bank->ts)
|
||||
record->ts = bank->ts;
|
||||
|
||||
lookup_bad_pages_in_a_row(ras_core, record,
|
||||
bank->nps, NULL, 0, bank->seq_no, true);
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user