drm/amdgpu: Add debug mask to disable CE logs

Add debug mask to disable kernel logs of RAS correctable errors,
including both ACA and CE error counter kernel messages.

Signed-off-by: Xiang Liu <xiang.liu@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Xiang Liu 2025-06-06 11:10:40 +08:00 committed by Alex Deucher
parent fb5ec2174d
commit f43411978d
4 changed files with 15 additions and 0 deletions

View File

@ -1282,6 +1282,7 @@ struct amdgpu_device {
bool debug_exp_resets;
bool debug_disable_gpu_ring_reset;
bool debug_vm_userptr;
bool debug_disable_ce_logs;
/* Protection for the following isolation structure */
struct mutex enforce_isolation_mutex;

View File

@ -115,6 +115,11 @@ static void aca_smu_bank_dump(struct amdgpu_device *adev, int idx, int total, st
u64 event_id = qctx ? qctx->evid.event_id : RAS_EVENT_INVALID_ID;
int i;
if (adev->debug_disable_ce_logs &&
bank->smu_err_type == ACA_SMU_TYPE_CE &&
!ACA_BANK_ERR_IS_DEFFERED(bank))
return;
RAS_EVENT_LOG(adev, event_id, HW_ERR "Accelerator Check Architecture events logged\n");
/* plus 1 for output format, e.g: ACA[08/08]: xxxx */
for (i = 0; i < ARRAY_SIZE(aca_regs); i++)

View File

@ -144,6 +144,7 @@ enum AMDGPU_DEBUG_MASK {
AMDGPU_DEBUG_DISABLE_GPU_RING_RESET = BIT(6),
AMDGPU_DEBUG_SMU_POOL = BIT(7),
AMDGPU_DEBUG_VM_USERPTR = BIT(8),
AMDGPU_DEBUG_DISABLE_RAS_CE_LOG = BIT(9)
};
unsigned int amdgpu_vram_limit = UINT_MAX;
@ -2278,6 +2279,11 @@ static void amdgpu_init_debug_options(struct amdgpu_device *adev)
pr_info("debug: VM mode debug for userptr is enabled\n");
adev->debug_vm_userptr = true;
}
if (amdgpu_debug_mask & AMDGPU_DEBUG_DISABLE_RAS_CE_LOG) {
pr_info("debug: disable kernel logs of correctalbe errors\n");
adev->debug_disable_ce_logs = true;
}
}
static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long flags)

View File

@ -1107,6 +1107,9 @@ static void amdgpu_ras_error_print_error_data(struct amdgpu_device *adev,
err_info->de_count, blk_name);
}
} else {
if (adev->debug_disable_ce_logs)
return;
for_each_ras_error(err_node, err_data) {
err_info = &err_node->err_info;
mcm_info = &err_info->mcm_info;