drm/amd/ras: add pmfw eeprom smu interfaces

add smu interfaces and its data structures for
pmfw eeprom in uniras

v2: add 'const' to smu messages array, and specify
    index for each member when initializing.

Signed-off-by: Gangliang Xie <ganglxie@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Yang Wang <kevinyang.wang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Gangliang Xie 2025-12-12 14:16:17 +08:00 committed by Alex Deucher
parent 45c692a177
commit 689b03a0a2
2 changed files with 64 additions and 0 deletions

View File

@ -28,6 +28,16 @@
#define RAS_MP1_MSG_QueryValidMcaCeCount 0x3A
#define RAS_MP1_MSG_McaBankCeDumpDW 0x3B
static const enum smu_message_type pmfw_eeprom_msgs[] = {
[RAS_SMU_GetRASTableVersion] = SMU_MSG_GetRASTableVersion,
[RAS_SMU_GetBadPageCount] = SMU_MSG_GetBadPageCount,
[RAS_SMU_SetTimestamp] = SMU_MSG_SetTimestamp,
[RAS_SMU_GetTimestamp] = SMU_MSG_GetTimestamp,
[RAS_SMU_GetBadPageIpid] = SMU_MSG_GetBadPageIpid,
[RAS_SMU_EraseRasTable] = SMU_MSG_EraseRasTable,
[RAS_SMU_GetBadPageMcaAddr] = SMU_MSG_GetBadPageMcaAddr,
};
static int mp1_v13_0_get_valid_bank_count(struct ras_core_context *ras_core,
u32 msg, u32 *count)
{
@ -87,8 +97,44 @@ static int mp1_v13_0_dump_valid_bank(struct ras_core_context *ras_core,
return ret;
}
static int mp1_v13_0_eeprom_send_msg(struct ras_core_context *ras_core,
enum ras_fw_eeprom_cmd index, uint32_t param, uint32_t *read_arg)
{
struct amdgpu_device *adev = (struct amdgpu_device *)ras_core->dev;
int ret = 0;
if (down_read_trylock(&adev->reset_domain->sem)) {
ret = amdgpu_smu_ras_send_msg(adev,
pmfw_eeprom_msgs[index], param, read_arg);
up_read(&adev->reset_domain->sem);
} else {
ret = -RAS_CORE_GPU_IN_MODE1_RESET;
}
return ret;
}
static int mp1_v13_0_get_ras_enabled_mask(struct ras_core_context *ras_core,
uint64_t *enabled_mask)
{
struct amdgpu_device *adev = (struct amdgpu_device *)ras_core->dev;
int ret = 0;
if (down_read_trylock(&adev->reset_domain->sem)) {
if (amdgpu_smu_ras_feature_is_enabled(adev, SMU_FEATURE_HROM_EN_BIT))
*enabled_mask |= RAS_CORE_FW_FEATURE_BIT__RAS_EEPROM;
up_read(&adev->reset_domain->sem);
} else {
ret = -RAS_CORE_GPU_IN_MODE1_RESET;
}
return ret;
}
const struct ras_mp1_sys_func amdgpu_ras_mp1_sys_func_v13_0 = {
.mp1_get_valid_bank_count = mp1_v13_0_get_valid_bank_count,
.mp1_dump_valid_bank = mp1_v13_0_dump_valid_bank,
.mp1_send_eeprom_msg = mp1_v13_0_eeprom_send_msg,
.mp1_get_ras_enabled_mask = mp1_v13_0_get_ras_enabled_mask,
};

View File

@ -49,6 +49,10 @@
#define GPU_RESET_CAUSE_FATAL (RAS_CORE_RESET_GPU | 0x0002)
#define GPU_RESET_CAUSE_RMA (RAS_CORE_RESET_GPU | 0x0004)
enum ras_core_fw_feature_flags {
RAS_CORE_FW_FEATURE_BIT__RAS_EEPROM = BIT_ULL(0),
};
enum ras_block_id {
RAS_BLOCK_ID__UMC = 0,
RAS_BLOCK_ID__SDMA,
@ -127,6 +131,16 @@ enum ras_gpu_status {
RAS_GPU_STATUS__IS_VF = 0x8,
};
enum ras_fw_eeprom_cmd {
RAS_SMU_GetRASTableVersion = 0,
RAS_SMU_GetBadPageCount,
RAS_SMU_SetTimestamp,
RAS_SMU_GetTimestamp,
RAS_SMU_GetBadPageIpid,
RAS_SMU_EraseRasTable,
RAS_SMU_GetBadPageMcaAddr,
};
struct ras_core_context;
struct ras_bank_ecc;
struct ras_umc;
@ -141,6 +155,10 @@ struct ras_mp1_sys_func {
u32 msg, u32 *count);
int (*mp1_dump_valid_bank)(struct ras_core_context *ras_core,
u32 msg, u32 idx, u32 reg_idx, u64 *val);
int (*mp1_send_eeprom_msg)(struct ras_core_context *ras_core,
enum ras_fw_eeprom_cmd index, uint32_t param, uint32_t *read_arg);
int (*mp1_get_ras_enabled_mask)(struct ras_core_context *ras_core,
uint64_t *enabled_mask);
};
struct ras_eeprom_sys_func {