drm/amdgpu: add ras_eeprom_read_idx interface

PMFW will manage RAS eeprom data by itself, add new interface to read
eeprom data via PMFW, we can read part of records by setting index.

v2: use IPID parse interface.
    pa is not used and set it to a fixed value.
v3: optimize the null pointer check for IPID parse interface.

Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Yang Wang <kevinyang.wang@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Tao Zhou 2025-07-23 19:04:17 +08:00 committed by Alex Deucher
parent cd74132be8
commit 7f34ddf77d
2 changed files with 55 additions and 0 deletions

View File

@ -970,6 +970,50 @@ static int __amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control,
return res;
}
int amdgpu_ras_eeprom_read_idx(struct amdgpu_ras_eeprom_control *control,
struct eeprom_table_record *record, u32 rec_idx,
const u32 num)
{
struct amdgpu_device *adev = to_amdgpu_device(control);
uint64_t ts, end_idx;
int i, ret;
u64 mca, ipid;
if (!amdgpu_ras_smu_eeprom_supported(adev))
return 0;
if (!adev->umc.ras || !adev->umc.ras->mca_ipid_parse)
return -EOPNOTSUPP;
end_idx = rec_idx + num;
for (i = rec_idx; i < end_idx; i++) {
ret = amdgpu_ras_smu_get_badpage_mca_addr(adev, i, &mca);
if (ret)
return ret;
ret = amdgpu_ras_smu_get_badpage_ipid(adev, i, &ipid);
if (ret)
return ret;
ret = amdgpu_ras_smu_get_timestamp(adev, i, &ts);
if (ret)
return ret;
record[i - rec_idx].address = mca;
/* retired_page (pa) is unused now */
record[i - rec_idx].retired_page = 0x1ULL;
record[i - rec_idx].ts = ts;
record[i - rec_idx].err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
record[i - rec_idx].cu = 0;
adev->umc.ras->mca_ipid_parse(adev, ipid, NULL,
(uint32_t *)&(record[i - rec_idx].mem_channel),
(uint32_t *)&(record[i - rec_idx].mcumc_id), NULL);
}
return 0;
}
/**
* amdgpu_ras_eeprom_read -- read EEPROM
* @control: pointer to control structure
@ -991,6 +1035,9 @@ int amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control,
u8 *buf, *pp;
u32 g0, g1;
if (amdgpu_ras_smu_eeprom_supported(adev))
return amdgpu_ras_eeprom_read_idx(control, record, 0, num);
if (!__is_ras_eeprom_supported(adev))
return 0;
@ -1162,6 +1209,10 @@ static ssize_t amdgpu_ras_debugfs_table_read(struct file *f, char __user *buf,
int res = -EFAULT;
size_t data_len;
/* pmfw manages eeprom data by itself */
if (amdgpu_ras_smu_eeprom_supported(adev))
return 0;
mutex_lock(&control->ras_tbl_mutex);
/* We want *pos - data_len > 0, which means there's

View File

@ -186,6 +186,10 @@ int amdgpu_ras_smu_get_badpage_ipid(struct amdgpu_device *adev,
int amdgpu_ras_smu_erase_ras_table(struct amdgpu_device *adev,
uint32_t *result);
int amdgpu_ras_eeprom_read_idx(struct amdgpu_ras_eeprom_control *control,
struct eeprom_table_record *record, u32 rec_idx,
const u32 num);
extern const struct file_operations amdgpu_ras_debugfs_eeprom_size_ops;
extern const struct file_operations amdgpu_ras_debugfs_eeprom_table_ops;