drm/amd/ras: add read func for pmfw eeprom

add read func for pmfw eeprom, and adapt address converting
for bad pages loaded from pmfw eeprom

v2: change label 'Out' to 'out'

Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Gangliang Xie <ganglxie@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Gangliang Xie 2025-12-15 14:19:34 +08:00 committed by Alex Deucher
parent 3972f41bc1
commit 42c46be2ec
6 changed files with 101 additions and 9 deletions

View File

@ -241,6 +241,7 @@ struct ras_bank_ecc {
uint64_t status;
uint64_t ipid;
uint64_t addr;
uint64_t ts;
};
struct ras_bank_ecc_node {

View File

@ -241,7 +241,10 @@ static int ras_core_eeprom_recovery(struct ras_core_context *ras_core)
int count;
int ret;
count = ras_eeprom_get_record_count(ras_core);
if (ras_fw_eeprom_supported(ras_core))
count = ras_fw_eeprom_get_record_count(ras_core);
else
count = ras_eeprom_get_record_count(ras_core);
if (!count)
return 0;

View File

@ -259,3 +259,73 @@ int ras_fw_eeprom_append(struct ras_core_context *ras_core,
mutex_unlock(&control->ras_tbl_mutex);
return 0;
}
int ras_fw_eeprom_read_idx(struct ras_core_context *ras_core,
struct eeprom_umc_record *record_umc,
struct ras_bank_ecc *ras_ecc,
u32 rec_idx, const u32 num)
{
struct ras_fw_eeprom_control *control = &ras_core->ras_fw_eeprom;
int i, ret, end_idx;
u64 mca, ipid, ts;
if (!ras_core->ras_umc.ip_func ||
!ras_core->ras_umc.ip_func->mca_ipid_parse)
return -EOPNOTSUPP;
mutex_lock(&control->ras_tbl_mutex);
end_idx = rec_idx + num;
for (i = rec_idx; i < end_idx; i++) {
ret = ras_fw_get_badpage_mca_addr(ras_core, i, &mca);
if (ret)
goto out;
ret = ras_fw_get_badpage_ipid(ras_core, i, &ipid);
if (ret)
goto out;
ret = ras_fw_get_timestamp(ras_core, i, &ts);
if (ret)
goto out;
if (record_umc) {
record_umc[i - rec_idx].address = mca;
/* retired_page (pa) is unused now */
record_umc[i - rec_idx].retired_row_pfn = 0x1ULL;
record_umc[i - rec_idx].ts = ts;
record_umc[i - rec_idx].err_type = RAS_EEPROM_ERR_NON_RECOVERABLE;
ras_core->ras_umc.ip_func->mca_ipid_parse(ras_core, ipid,
(uint32_t *)&(record_umc[i - rec_idx].cu),
(uint32_t *)&(record_umc[i - rec_idx].mem_channel),
(uint32_t *)&(record_umc[i - rec_idx].mcumc_id), NULL);
/* update bad channel bitmap */
if ((record_umc[i - rec_idx].mem_channel < BITS_PER_TYPE(control->bad_channel_bitmap)) &&
!(control->bad_channel_bitmap & (1 << record_umc[i - rec_idx].mem_channel))) {
control->bad_channel_bitmap |= 1 << record_umc[i - rec_idx].mem_channel;
control->update_channel_flag = true;
}
}
if (ras_ecc) {
ras_ecc[i - rec_idx].addr = mca;
ras_ecc[i - rec_idx].ipid = ipid;
ras_ecc[i - rec_idx].ts = ts;
}
}
out:
mutex_unlock(&control->ras_tbl_mutex);
return ret;
}
uint32_t ras_fw_eeprom_get_record_count(struct ras_core_context *ras_core)
{
if (!ras_core)
return 0;
return ras_core->ras_fw_eeprom.ras_num_recs;
}

View File

@ -70,5 +70,10 @@ int ras_fw_eeprom_reset_table(struct ras_core_context *ras_core);
bool ras_fw_eeprom_check_safety_watermark(struct ras_core_context *ras_core);
int ras_fw_eeprom_append(struct ras_core_context *ras_core,
struct eeprom_umc_record *record, const u32 num);
int ras_fw_eeprom_read_idx(struct ras_core_context *ras_core,
struct eeprom_umc_record *record_umc,
struct ras_bank_ecc *ras_ecc,
u32 rec_idx, const u32 num);
uint32_t ras_fw_eeprom_get_record_count(struct ras_core_context *ras_core);
#endif

View File

@ -448,17 +448,27 @@ int ras_umc_load_bad_pages(struct ras_core_context *ras_core)
uint32_t ras_num_recs;
int ret;
ras_num_recs = ras_eeprom_get_record_count(ras_core);
/* no bad page record, skip eeprom access */
if (!ras_num_recs ||
ras_core->ras_eeprom.record_threshold_config == DISABLE_RETIRE_PAGE)
return 0;
if (ras_fw_eeprom_supported(ras_core)) {
ras_num_recs = ras_fw_eeprom_get_record_count(ras_core);
/* no bad page record, skip eeprom access */
if (!ras_num_recs ||
ras_core->ras_fw_eeprom.record_threshold_config == DISABLE_RETIRE_PAGE)
return 0;
} else {
ras_num_recs = ras_eeprom_get_record_count(ras_core);
if (!ras_num_recs ||
ras_core->ras_eeprom.record_threshold_config == DISABLE_RETIRE_PAGE)
return 0;
}
bps = kzalloc_objs(*bps, ras_num_recs);
if (!bps)
return -ENOMEM;
ret = ras_eeprom_read(ras_core, bps, ras_num_recs);
if (ras_fw_eeprom_supported(ras_core))
ret = ras_fw_eeprom_read_idx(ras_core, bps, 0, 0, ras_num_recs);
else
ret = ras_eeprom_read(ras_core, bps, ras_num_recs);
if (ret) {
RAS_DEV_ERR(ras_core->dev, "Failed to load EEPROM table records!");
} else {
@ -486,7 +496,10 @@ static int ras_umc_save_bad_pages(struct ras_core_context *ras_core)
if (!data->bps)
return 0;
eeprom_record_num = ras_eeprom_get_record_count(ras_core);
if (ras_fw_eeprom_supported(ras_core))
eeprom_record_num = ras_fw_eeprom_get_record_count(ras_core);
else
eeprom_record_num = ras_eeprom_get_record_count(ras_core);
mutex_lock(&ras_umc->umc_lock);
save_count = data->count - eeprom_record_num;
/* only new entries are saved */

View File

@ -413,7 +413,7 @@ static int umc_v12_0_eeprom_record_to_nps_record(struct ras_core_context *ras_co
uint64_t pa = 0;
int ret = 0;
if (nps == EEPROM_RECORD_UMC_NPS_MODE(record)) {
if (nps == EEPROM_RECORD_UMC_NPS_MODE(record) && !ras_fw_eeprom_supported(ras_core)) {
record->cur_nps_retired_row_pfn = EEPROM_RECORD_UMC_ADDR_PFN(record);
} else {
ret = convert_eeprom_record_to_nps_addr(ras_core,