drm/amd/pm: implement ras_smu_drv interface for smu v13.0.12

implement ras_smu_drv interface for smu v13.0.12

Signed-off-by: Gangliang Xie <ganglxie@amd.com>
Signed-off-by: Yang Wang <kevinyang.wang@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Gangliang Xie 2025-09-12 12:43:35 +08:00 committed by Alex Deucher
parent 0c6f09e65b
commit 77dbd7c0a2
3 changed files with 156 additions and 0 deletions

View File

@ -503,6 +503,32 @@ struct ras_critical_region {
uint64_t size;
};
struct ras_eeprom_table_version {
uint32_t minor : 16;
uint32_t major : 16;
};
struct ras_eeprom_smu_funcs {
int (*get_ras_table_version)(struct amdgpu_device *adev,
uint32_t *table_version);
int (*get_badpage_count)(struct amdgpu_device *adev, uint32_t *count, uint32_t timeout);
int (*get_badpage_mca_addr)(struct amdgpu_device *adev, uint16_t index, uint64_t *mca_addr);
int (*set_timestamp)(struct amdgpu_device *adev, uint64_t timestamp);
int (*get_timestamp)(struct amdgpu_device *adev,
uint16_t index, uint64_t *timestamp);
int (*get_badpage_ipid)(struct amdgpu_device *adev, uint16_t index, uint64_t *ipid);
int (*erase_ras_table)(struct amdgpu_device *adev, uint32_t *result);
};
enum ras_smu_feature_flags {
RAS_SMU_FEATURE_BIT__RAS_EEPROM = BIT_ULL(0),
};
struct ras_smu_drv {
const struct ras_eeprom_smu_funcs *smu_eeprom_funcs;
void (*ras_smu_feature_flags)(struct amdgpu_device *adev, uint64_t *flags);
};
struct amdgpu_ras {
void *ras_mgr;
/* ras infrastructure */

View File

@ -34,6 +34,7 @@
#include "amdgpu_fru_eeprom.h"
#include <linux/pci.h>
#include "smu_cmn.h"
#include "amdgpu_ras.h"
#undef MP1_Public
#undef smnMP1_FIRMWARE_FLAGS
@ -925,3 +926,131 @@ const struct smu_temp_funcs smu_v13_0_12_temp_funcs = {
.temp_metrics_is_supported = smu_v13_0_12_is_temp_metrics_supported,
.get_temp_metrics = smu_v13_0_12_get_temp_metrics,
};
static int smu_v13_0_12_get_ras_table_version(struct amdgpu_device *adev,
uint32_t *table_version)
{
struct smu_context *smu = adev->powerplay.pp_handle;
return smu_cmn_send_smc_msg_with_param(smu,
SMU_MSG_GetRASTableVersion, 0, table_version);
}
static int smu_v13_0_12_get_badpage_count(struct amdgpu_device *adev, uint32_t *count,
uint32_t timeout)
{
struct smu_context *smu = adev->powerplay.pp_handle;
uint64_t end, now;
int ret = 0;
now = (uint64_t)ktime_to_ms(ktime_get());
end = now + timeout;
do {
ret = smu_cmn_send_smc_msg_with_param(smu,
SMU_MSG_GetBadPageCount, 0, count);
/* eeprom is not ready */
if (ret != -EBUSY)
return ret;
mdelay(10);
now = (uint64_t)ktime_to_ms(ktime_get());
} while (now < end);
return ret;
}
static int smu_v13_0_12_set_timestamp(struct amdgpu_device *adev, uint64_t timestamp)
{
struct smu_context *smu = adev->powerplay.pp_handle;
return smu_cmn_send_smc_msg_with_param(smu,
SMU_MSG_SetTimestamp, (uint32_t)timestamp, 0);
}
static int smu_v13_0_12_get_timestamp(struct amdgpu_device *adev,
uint16_t index, uint64_t *timestamp)
{
struct smu_context *smu = adev->powerplay.pp_handle;
uint32_t temp;
int ret;
ret = smu_cmn_send_smc_msg_with_param(smu,
SMU_MSG_GetTimestamp, index, &temp);
if (!ret)
*timestamp = temp;
return ret;
}
static int smu_v13_0_12_get_badpage_ipid(struct amdgpu_device *adev,
uint16_t index, uint64_t *ipid)
{
struct smu_context *smu = adev->powerplay.pp_handle;
uint32_t temp_arg, temp_ipid_lo, temp_ipid_high;
int ret;
temp_arg = index | (1 << 16);
ret = smu_cmn_send_smc_msg_with_param(smu,
SMU_MSG_GetBadPageIpid, temp_arg, &temp_ipid_lo);
if (ret)
return ret;
temp_arg = index | (2 << 16);
ret = smu_cmn_send_smc_msg_with_param(smu,
SMU_MSG_GetBadPageIpid, temp_arg, &temp_ipid_high);
if (!ret)
*ipid = (uint64_t)temp_ipid_high << 32 | temp_ipid_lo;
return ret;
}
static int smu_v13_0_12_erase_ras_table(struct amdgpu_device *adev,
uint32_t *result)
{
struct smu_context *smu = adev->powerplay.pp_handle;
return smu_cmn_send_smc_msg_with_param(smu,
SMU_MSG_EraseRasTable, 0, result);
}
static int smu_v13_0_12_get_badpage_mca_addr(struct amdgpu_device *adev,
uint16_t index, uint64_t *mca_addr)
{
struct smu_context *smu = adev->powerplay.pp_handle;
uint32_t temp_arg, temp_addr_lo, temp_addr_high;
int ret;
temp_arg = index | (1 << 16);
ret = smu_cmn_send_smc_msg_with_param(smu,
SMU_MSG_GetBadPageMcaAddr, temp_arg, &temp_addr_lo);
if (ret)
return ret;
temp_arg = index | (2 << 16);
ret = smu_cmn_send_smc_msg_with_param(smu,
SMU_MSG_GetBadPageMcaAddr, temp_arg, &temp_addr_high);
if (!ret)
*mca_addr = (uint64_t)temp_addr_high << 32 | temp_addr_lo;
return ret;
}
static const struct ras_eeprom_smu_funcs smu_v13_0_12_eeprom_smu_funcs = {
.get_ras_table_version = smu_v13_0_12_get_ras_table_version,
.get_badpage_count = smu_v13_0_12_get_badpage_count,
.get_badpage_mca_addr = smu_v13_0_12_get_badpage_mca_addr,
.set_timestamp = smu_v13_0_12_set_timestamp,
.get_timestamp = smu_v13_0_12_get_timestamp,
.get_badpage_ipid = smu_v13_0_12_get_badpage_ipid,
.erase_ras_table = smu_v13_0_12_erase_ras_table,
};
static void smu_v13_0_12_ras_smu_feature_flags(struct amdgpu_device *adev, uint64_t *flags)
{
if (!flags)
return;
*flags = 0ULL;
}
const struct ras_smu_drv smu_v13_0_12_ras_smu_drv = {
.smu_eeprom_funcs = &smu_v13_0_12_eeprom_smu_funcs,
.ras_smu_feature_flags = smu_v13_0_12_ras_smu_feature_flags,
};

View File

@ -105,6 +105,7 @@ int smu_v13_0_12_get_npm_data(struct smu_context *smu,
extern const struct cmn2asic_mapping smu_v13_0_12_feature_mask_map[];
extern const struct cmn2asic_msg_mapping smu_v13_0_12_message_map[];
extern const struct smu_temp_funcs smu_v13_0_12_temp_funcs;
extern const struct ras_smu_drv smu_v13_0_12_ras_smu_drv;
#if defined(SWSMU_CODE_LAYER_L2)
#include "smu_cmn.h"