mirror of
https://github.com/torvalds/linux.git
synced 2026-05-23 14:42:08 +02:00
drm/amdgpu: Introduce funcs for generating cper record
Introduce new functions that are used to generate cper ue or ce records. v2: return -ENOMEM instead of false v2: check return value of fill section function Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Xiang Liu <xiang.liu@amd.com> Reviewed-by: Yang Wang <keivnyang.wang@amd.com> Reviewed-by: Tao Zhou <tao.zhou1@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
56316ee91b
commit
ad97840f95
|
|
@ -30,16 +30,6 @@
|
|||
|
||||
typedef int bank_handler_t(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type, void *data);
|
||||
|
||||
struct aca_banks {
|
||||
int nr_banks;
|
||||
struct list_head list;
|
||||
};
|
||||
|
||||
struct aca_hwip {
|
||||
int hwid;
|
||||
int mcatype;
|
||||
};
|
||||
|
||||
static struct aca_hwip aca_hwid_mcatypes[ACA_HWIP_TYPE_COUNT] = {
|
||||
ACA_BANK_HWID(SMU, 0x01, 0x01),
|
||||
ACA_BANK_HWID(PCS_XGMI, 0x50, 0x00),
|
||||
|
|
@ -111,7 +101,7 @@ static struct aca_regs_dump {
|
|||
{"STATUS", ACA_REG_IDX_STATUS},
|
||||
{"ADDR", ACA_REG_IDX_ADDR},
|
||||
{"MISC", ACA_REG_IDX_MISC0},
|
||||
{"CONFIG", ACA_REG_IDX_CONFG},
|
||||
{"CONFIG", ACA_REG_IDX_CONFIG},
|
||||
{"IPID", ACA_REG_IDX_IPID},
|
||||
{"SYND", ACA_REG_IDX_SYND},
|
||||
{"DESTAT", ACA_REG_IDX_DESTAT},
|
||||
|
|
|
|||
|
|
@ -81,7 +81,7 @@ enum aca_reg_idx {
|
|||
ACA_REG_IDX_STATUS = 1,
|
||||
ACA_REG_IDX_ADDR = 2,
|
||||
ACA_REG_IDX_MISC0 = 3,
|
||||
ACA_REG_IDX_CONFG = 4,
|
||||
ACA_REG_IDX_CONFIG = 4,
|
||||
ACA_REG_IDX_IPID = 5,
|
||||
ACA_REG_IDX_SYND = 6,
|
||||
ACA_REG_IDX_DESTAT = 8,
|
||||
|
|
@ -114,6 +114,11 @@ enum aca_smu_type {
|
|||
ACA_SMU_TYPE_COUNT,
|
||||
};
|
||||
|
||||
struct aca_hwip {
|
||||
int hwid;
|
||||
int mcatype;
|
||||
};
|
||||
|
||||
struct aca_bank {
|
||||
enum aca_error_type aca_err_type;
|
||||
enum aca_smu_type smu_err_type;
|
||||
|
|
@ -125,6 +130,11 @@ struct aca_bank_node {
|
|||
struct list_head node;
|
||||
};
|
||||
|
||||
struct aca_banks {
|
||||
int nr_banks;
|
||||
struct list_head list;
|
||||
};
|
||||
|
||||
struct aca_bank_info {
|
||||
int die_id;
|
||||
int socket_id;
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@
|
|||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
#include <linux/list.h>
|
||||
#include "amdgpu.h"
|
||||
|
||||
static const guid_t MCE = CPER_NOTIFY_MCE;
|
||||
|
|
@ -257,6 +258,113 @@ struct cper_hdr *amdgpu_cper_alloc_entry(struct amdgpu_device *adev,
|
|||
return hdr;
|
||||
}
|
||||
|
||||
int amdgpu_cper_generate_ue_record(struct amdgpu_device *adev,
|
||||
struct aca_bank *bank)
|
||||
{
|
||||
struct cper_hdr *fatal = NULL;
|
||||
struct cper_sec_crashdump_reg_data reg_data = { 0 };
|
||||
int ret;
|
||||
|
||||
fatal = amdgpu_cper_alloc_entry(adev, AMDGPU_CPER_TYPE_FATAL, 1);
|
||||
if (!fatal) {
|
||||
dev_err(adev->dev, "fail to alloc cper entry for ue record\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
reg_data.status_lo = lower_32_bits(bank->regs[ACA_REG_IDX_STATUS]);
|
||||
reg_data.status_hi = upper_32_bits(bank->regs[ACA_REG_IDX_STATUS]);
|
||||
reg_data.addr_lo = lower_32_bits(bank->regs[ACA_REG_IDX_ADDR]);
|
||||
reg_data.addr_hi = upper_32_bits(bank->regs[ACA_REG_IDX_ADDR]);
|
||||
reg_data.ipid_lo = lower_32_bits(bank->regs[ACA_REG_IDX_IPID]);
|
||||
reg_data.ipid_hi = upper_32_bits(bank->regs[ACA_REG_IDX_IPID]);
|
||||
reg_data.synd_lo = lower_32_bits(bank->regs[ACA_REG_IDX_SYND]);
|
||||
reg_data.synd_hi = upper_32_bits(bank->regs[ACA_REG_IDX_SYND]);
|
||||
|
||||
amdgpu_cper_entry_fill_hdr(adev, fatal, AMDGPU_CPER_TYPE_FATAL, CPER_SEV_FATAL);
|
||||
ret = amdgpu_cper_entry_fill_fatal_section(adev, fatal, 0, reg_data);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/*TODO: commit the cper entry to cper ring */
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static enum cper_error_severity amdgpu_aca_err_type_to_cper_sev(struct amdgpu_device *adev,
|
||||
enum aca_error_type aca_err_type)
|
||||
{
|
||||
switch (aca_err_type) {
|
||||
case ACA_ERROR_TYPE_UE:
|
||||
return CPER_SEV_FATAL;
|
||||
case ACA_ERROR_TYPE_CE:
|
||||
return CPER_SEV_NON_FATAL_CORRECTED;
|
||||
case ACA_ERROR_TYPE_DEFERRED:
|
||||
return CPER_SEV_NON_FATAL_UNCORRECTED;
|
||||
default:
|
||||
dev_err(adev->dev, "Unknown ACA error type!\n");
|
||||
return CPER_SEV_FATAL;
|
||||
}
|
||||
}
|
||||
|
||||
int amdgpu_cper_generate_ce_records(struct amdgpu_device *adev,
|
||||
struct aca_banks *banks,
|
||||
uint16_t bank_count)
|
||||
{
|
||||
struct cper_hdr *corrected = NULL;
|
||||
enum cper_error_severity sev = CPER_SEV_NON_FATAL_CORRECTED;
|
||||
uint32_t reg_data[CPER_ACA_REG_COUNT] = { 0 };
|
||||
struct aca_bank_node *node;
|
||||
struct aca_bank *bank;
|
||||
uint32_t i = 0;
|
||||
int ret;
|
||||
|
||||
corrected = amdgpu_cper_alloc_entry(adev, AMDGPU_CPER_TYPE_RUNTIME, bank_count);
|
||||
if (!corrected) {
|
||||
dev_err(adev->dev, "fail to allocate cper entry for ce records\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/* Raise severity if any DE is detected in the ACA bank list */
|
||||
list_for_each_entry(node, &banks->list, node) {
|
||||
bank = &node->bank;
|
||||
if (bank->aca_err_type == ACA_ERROR_TYPE_DEFERRED) {
|
||||
sev = CPER_SEV_NON_FATAL_UNCORRECTED;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
amdgpu_cper_entry_fill_hdr(adev, corrected, AMDGPU_CPER_TYPE_RUNTIME, sev);
|
||||
|
||||
/* Combine CE and UE in cper record */
|
||||
list_for_each_entry(node, &banks->list, node) {
|
||||
bank = &node->bank;
|
||||
reg_data[CPER_ACA_REG_CTL_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_CTL]);
|
||||
reg_data[CPER_ACA_REG_CTL_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_CTL]);
|
||||
reg_data[CPER_ACA_REG_STATUS_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_STATUS]);
|
||||
reg_data[CPER_ACA_REG_STATUS_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_STATUS]);
|
||||
reg_data[CPER_ACA_REG_ADDR_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_ADDR]);
|
||||
reg_data[CPER_ACA_REG_ADDR_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_ADDR]);
|
||||
reg_data[CPER_ACA_REG_MISC0_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_MISC0]);
|
||||
reg_data[CPER_ACA_REG_MISC0_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_MISC0]);
|
||||
reg_data[CPER_ACA_REG_CONFIG_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_CONFIG]);
|
||||
reg_data[CPER_ACA_REG_CONFIG_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_CONFIG]);
|
||||
reg_data[CPER_ACA_REG_IPID_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_IPID]);
|
||||
reg_data[CPER_ACA_REG_IPID_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_IPID]);
|
||||
reg_data[CPER_ACA_REG_SYND_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_SYND]);
|
||||
reg_data[CPER_ACA_REG_SYND_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_SYND]);
|
||||
|
||||
ret = amdgpu_cper_entry_fill_runtime_section(adev, corrected, i++,
|
||||
amdgpu_aca_err_type_to_cper_sev(adev, bank->aca_err_type),
|
||||
reg_data, CPER_ACA_REG_COUNT);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*TODO: commit the cper entry to cper ring */
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int amdgpu_cper_init(struct amdgpu_device *adev)
|
||||
{
|
||||
mutex_init(&adev->cper.cper_lock);
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@
|
|||
#define __AMDGPU_CPER_H__
|
||||
|
||||
#include "amd_cper.h"
|
||||
#include "amdgpu_aca.h"
|
||||
|
||||
#define CPER_MAX_ALLOWED_COUNT 0x1000
|
||||
#define HDR_LEN (sizeof(struct cper_hdr))
|
||||
|
|
@ -84,7 +85,13 @@ int amdgpu_cper_entry_fill_bad_page_threshold_section(struct amdgpu_device *adev
|
|||
struct cper_hdr *amdgpu_cper_alloc_entry(struct amdgpu_device *adev,
|
||||
enum amdgpu_cper_type type,
|
||||
uint16_t section_count);
|
||||
|
||||
/* UE must be encoded into separated cper entries, 1 UE 1 cper */
|
||||
int amdgpu_cper_generate_ue_record(struct amdgpu_device *adev,
|
||||
struct aca_bank *bank);
|
||||
/* CEs and DEs are combined into 1 cper entry */
|
||||
int amdgpu_cper_generate_ce_records(struct amdgpu_device *adev,
|
||||
struct aca_banks *banks,
|
||||
uint16_t bank_count);
|
||||
int amdgpu_cper_init(struct amdgpu_device *adev);
|
||||
int amdgpu_cper_fini(struct amdgpu_device *adev);
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user