drm/amdgpu: Add msg handlers for SRIOV RAS Telemetry

Add message handlers for RAS telemetry.

Signed-off-by: Victor Skvortsov <victor.skvortsov@amd.com>
Reviewed-by: Zhigang Luo <zhigang.luo@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Victor Skvortsov 2024-10-30 09:45:27 -04:00 committed by Alex Deucher
parent 60c58d72af
commit 9928509dfc
2 changed files with 15 additions and 2 deletions

View File

@ -95,6 +95,7 @@ struct amdgpu_virt_ops {
void (*ras_poison_handler)(struct amdgpu_device *adev,
enum amdgpu_ras_block block);
bool (*rcvd_ras_intr)(struct amdgpu_device *adev);
int (*req_ras_err_count)(struct amdgpu_device *adev);
};
/*

View File

@ -61,15 +61,18 @@ static enum idh_event xgpu_nv_mailbox_peek_msg(struct amdgpu_device *adev)
static int xgpu_nv_mailbox_rcv_msg(struct amdgpu_device *adev,
enum idh_event event)
{
int r = 0;
u32 reg;
reg = RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW0);
if (reg != event)
if (reg == IDH_FAIL)
r = -EINVAL;
else if (reg != event)
return -ENOENT;
xgpu_nv_mailbox_send_ack(adev);
return 0;
return r;
}
static uint8_t xgpu_nv_peek_ack(struct amdgpu_device *adev)
@ -178,6 +181,9 @@ static int xgpu_nv_send_access_requests_with_param(struct amdgpu_device *adev,
if (data1 != 0)
event = IDH_RAS_POISON_READY;
break;
case IDH_REQ_RAS_ERROR_COUNT:
event = IDH_RAS_ERROR_COUNT_READY;
break;
default:
break;
}
@ -456,6 +462,11 @@ static bool xgpu_nv_rcvd_ras_intr(struct amdgpu_device *adev)
return (msg == IDH_RAS_ERROR_DETECTED || msg == 0xFFFFFFFF);
}
static int xgpu_nv_req_ras_err_count(struct amdgpu_device *adev)
{
return xgpu_nv_send_access_requests(adev, IDH_REQ_RAS_ERROR_COUNT);
}
const struct amdgpu_virt_ops xgpu_nv_virt_ops = {
.req_full_gpu = xgpu_nv_request_full_gpu_access,
.rel_full_gpu = xgpu_nv_release_full_gpu_access,
@ -466,4 +477,5 @@ const struct amdgpu_virt_ops xgpu_nv_virt_ops = {
.trans_msg = xgpu_nv_mailbox_trans_msg,
.ras_poison_handler = xgpu_nv_ras_poison_handler,
.rcvd_ras_intr = xgpu_nv_rcvd_ras_intr,
.req_ras_err_count = xgpu_nv_req_ras_err_count,
};