mirror of
https://github.com/torvalds/linux.git
synced 2026-05-27 08:33:17 +02:00
drm/amdgpu: Add poison consumption handling for gfx v12_1
Add poison consumption handling for gfx v12_1. Signed-off-by: YiPeng Chai <YiPeng.Chai@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
c32606c8c6
commit
df1f11fe14
|
|
@ -463,6 +463,7 @@ struct amdgpu_gfx {
|
|||
struct amdgpu_irq_src cp_ecc_error_irq;
|
||||
struct amdgpu_irq_src sq_irq;
|
||||
struct amdgpu_irq_src rlc_gc_fed_irq;
|
||||
struct amdgpu_irq_src rlc_poison_irq;
|
||||
struct sq_work sq_work;
|
||||
|
||||
/* gfx status */
|
||||
|
|
|
|||
|
|
@ -45,6 +45,7 @@
|
|||
#include "v12_structs.h"
|
||||
#include "gfx_v12_1.h"
|
||||
#include "mes_v12_1.h"
|
||||
#include "amdgpu_ras_mgr.h"
|
||||
|
||||
#define GFX12_MEC_HPD_SIZE 2048
|
||||
#define NUM_SIMD_PER_CU_GFX12_1 4
|
||||
|
|
@ -1184,6 +1185,13 @@ static int gfx_v12_1_sw_init(struct amdgpu_ip_block *ip_block)
|
|||
if (r)
|
||||
return r;
|
||||
|
||||
/* RLC POISON Error */
|
||||
r = amdgpu_irq_add_id(adev, SOC_V1_0_IH_CLIENTID_RLC,
|
||||
GFX_12_1_0__SRCID__RLC_POISON_INTERRUPT,
|
||||
&adev->gfx.rlc_poison_irq);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
|
||||
|
||||
r = gfx_v12_1_rlc_init(adev);
|
||||
|
|
@ -3778,6 +3786,35 @@ static int gfx_v12_1_priv_inst_irq(struct amdgpu_device *adev,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int gfx_v12_1_rlc_poison_irq(struct amdgpu_device *adev,
|
||||
struct amdgpu_irq_src *source,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
{
|
||||
uint32_t rlc_fed_status = 0;
|
||||
uint32_t ras_blk = RAS_BLOCK_ID__GFX;
|
||||
struct ras_ih_info ih_info = {0};
|
||||
int i, num_xcc;
|
||||
|
||||
num_xcc = NUM_XCC(adev->gfx.xcc_mask);
|
||||
for (i = 0; i < num_xcc; i++)
|
||||
rlc_fed_status |= RREG32(SOC15_REG_OFFSET(GC,
|
||||
GET_INST(GC, i), regRLC_RLCS_FED_STATUS));
|
||||
|
||||
if (!rlc_fed_status)
|
||||
return 0;
|
||||
|
||||
if (REG_GET_FIELD(rlc_fed_status, RLC_RLCS_FED_STATUS, SDMA0_FED_ERR) ||
|
||||
REG_GET_FIELD(rlc_fed_status, RLC_RLCS_FED_STATUS, SDMA1_FED_ERR))
|
||||
ras_blk = RAS_BLOCK_ID__SDMA;
|
||||
|
||||
dev_warn(adev->dev, "RLC %d FED IRQ\n", ras_blk);
|
||||
|
||||
ih_info.block = ras_blk;
|
||||
ih_info.reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
|
||||
amdgpu_ras_mgr_dispatch_interrupt(adev, &ih_info);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void gfx_v12_1_emit_mem_sync(struct amdgpu_ring *ring)
|
||||
{
|
||||
const unsigned int gcr_cntl =
|
||||
|
|
@ -3902,6 +3939,10 @@ static const struct amdgpu_irq_src_funcs gfx_v12_1_priv_inst_irq_funcs = {
|
|||
.process = gfx_v12_1_priv_inst_irq,
|
||||
};
|
||||
|
||||
static const struct amdgpu_irq_src_funcs gfx_v12_1_rlc_poison_irq_funcs = {
|
||||
.process = gfx_v12_1_rlc_poison_irq,
|
||||
};
|
||||
|
||||
static void gfx_v12_1_set_irq_funcs(struct amdgpu_device *adev)
|
||||
{
|
||||
adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
|
||||
|
|
@ -3912,6 +3953,9 @@ static void gfx_v12_1_set_irq_funcs(struct amdgpu_device *adev)
|
|||
|
||||
adev->gfx.priv_inst_irq.num_types = 1;
|
||||
adev->gfx.priv_inst_irq.funcs = &gfx_v12_1_priv_inst_irq_funcs;
|
||||
|
||||
adev->gfx.rlc_poison_irq.num_types = 1;
|
||||
adev->gfx.rlc_poison_irq.funcs = &gfx_v12_1_rlc_poison_irq_funcs;
|
||||
}
|
||||
|
||||
static void gfx_v12_1_set_imu_funcs(struct amdgpu_device *adev)
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@
|
|||
#include "ivsrcid/vmc/irqsrcs_vmc_1_0.h"
|
||||
#include "kfd_smi_events.h"
|
||||
#include "kfd_debug.h"
|
||||
#include "amdgpu_ras_mgr.h"
|
||||
|
||||
/*
|
||||
* GFX12.1 SQ Interrupts
|
||||
|
|
@ -185,6 +186,7 @@ static void event_interrupt_poison_consumption_v12_1(struct kfd_node *node,
|
|||
enum amdgpu_ras_block block = 0;
|
||||
int ret = -EINVAL;
|
||||
uint32_t reset = 0;
|
||||
u64 event_id = RAS_EVENT_INVALID_ID;
|
||||
struct kfd_process *p = kfd_lookup_process_by_pasid(pasid, NULL);
|
||||
|
||||
if (!p)
|
||||
|
|
@ -220,7 +222,15 @@ static void event_interrupt_poison_consumption_v12_1(struct kfd_node *node,
|
|||
* resetting queue passes, do page retirement without gpu reset
|
||||
* resetting queue fails, fallback to gpu reset solution
|
||||
*/
|
||||
amdgpu_amdkfd_ras_poison_consumption_handler(node->adev, block, reset);
|
||||
if (amdgpu_uniras_enabled(node->adev))
|
||||
event_id = amdgpu_ras_mgr_gen_ras_event_seqno(node->adev,
|
||||
RAS_SEQNO_TYPE_POISON_CONSUMPTION);
|
||||
|
||||
RAS_EVENT_LOG(node->adev, event_id,
|
||||
"poison is consumed by source %d, kick off gpu reset flow\n", source_id);
|
||||
|
||||
amdgpu_amdkfd_ras_pasid_poison_consumption_handler(node->adev,
|
||||
block, pasid, NULL, NULL, reset);
|
||||
}
|
||||
|
||||
static bool event_interrupt_isr_v12_1(struct kfd_node *node,
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user