drm/amdgpu: Update boot time errors polling sequence

Update boot time errors polling sequence to align with
the latest firmware change.

Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Frank Min <Frank.Min@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Hawking Zhang 2024-01-29 20:29:08 +08:00 committed by Alex Deucher
parent c3ec8c4f9a
commit 1731ba9b64
2 changed files with 18 additions and 1 deletions

View File

@ -4120,6 +4120,18 @@ static int amdgpu_ras_wait_for_boot_complete(struct amdgpu_device *adev,
u32 reg_data;
int retry_loop;
reg_addr = (mmMP0_SMN_C2PMSG_92 << 2) +
aqua_vanjaram_encode_ext_smn_addressing(instance);
for (retry_loop = 0; retry_loop < AMDGPU_RAS_BOOT_STATUS_POLLING_LIMIT; retry_loop++) {
reg_data = amdgpu_device_indirect_rreg_ext(adev, reg_addr);
if ((reg_data & AMDGPU_RAS_BOOT_STATUS_MASK) == AMDGPU_RAS_BOOT_STEADY_STATUS) {
*boot_error = AMDGPU_RAS_BOOT_SUCEESS;
return 0;
}
msleep(1);
}
/* The pattern for smn addressing in other SOC could be different from
* the one for aqua_vanjaram. We should revisit the code if the pattern
* is changed. In such case, replace the aqua_vanjaram implementation
@ -4127,7 +4139,7 @@ static int amdgpu_ras_wait_for_boot_complete(struct amdgpu_device *adev,
reg_addr = (mmMP0_SMN_C2PMSG_126 << 2) +
aqua_vanjaram_encode_ext_smn_addressing(instance);
for (retry_loop = 0; retry_loop < 1000; retry_loop++) {
for (retry_loop = 0; retry_loop < AMDGPU_RAS_BOOT_STATUS_POLLING_LIMIT; retry_loop++) {
reg_data = amdgpu_device_indirect_rreg_ext(adev, reg_addr);
if (AMDGPU_RAS_GPU_ERR_BOOT_STATUS(reg_data)) {
*boot_error = reg_data;

View File

@ -46,6 +46,11 @@ struct amdgpu_iv_entry;
#define AMDGPU_RAS_GPU_ERR_HBM_ID(x) AMDGPU_GET_REG_FIELD(x, 13, 13)
#define AMDGPU_RAS_GPU_ERR_BOOT_STATUS(x) AMDGPU_GET_REG_FIELD(x, 31, 31)
#define AMDGPU_RAS_BOOT_STATUS_POLLING_LIMIT 1000
#define AMDGPU_RAS_BOOT_STEADY_STATUS 0xBA
#define AMDGPU_RAS_BOOT_STATUS_MASK 0xFF
#define AMDGPU_RAS_BOOT_SUCEESS 0x80000000
#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS (0x1 << 0)
/* position of instance value in sub_block_index of
* ta_ras_trigger_error_input, the sub block uses lower 12 bits