From 4e696906e9a82d4cab75f3083fabd65433c77e20 Mon Sep 17 00:00:00 2001 From: Xuemei Liu Date: Thu, 29 May 2025 10:25:11 +0800 Subject: [PATCH 01/12] drm/amdkfd: enable kfd on RISCV systems KFD has been confirmed that can run on RISCV systems. It's necessary to support CONFIG_HSA_AMD on RISCV. Signed-off-by: Xuemei Liu Signed-off-by: Felix Kuehling Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig index d3c3d3ab7225..62e88e5362e9 100644 --- a/drivers/gpu/drm/amd/amdkfd/Kconfig +++ b/drivers/gpu/drm/amd/amdkfd/Kconfig @@ -5,7 +5,7 @@ config HSA_AMD bool "HSA kernel driver for AMD GPU devices" - depends on DRM_AMDGPU && (X86_64 || ARM64 || PPC64) + depends on DRM_AMDGPU && (X86_64 || ARM64 || PPC64 || (RISCV && 64BIT)) select HMM_MIRROR select MMU_NOTIFIER select DRM_AMDGPU_USERPTR From 719d84f8a812608fc0f7be18a96d7dee96eaf3ba Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Thu, 29 May 2025 15:57:44 +0530 Subject: [PATCH 02/12] drm/amdgpu: Add more checks to discovery fetch Add more checks for valid vram size and log error, if any. Signed-off-by: Lijo Lazar Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 9e738fae2b74..a0e9bf9b2710 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -270,9 +270,10 @@ static int amdgpu_discovery_read_binary_from_sysmem(struct amdgpu_device *adev, static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev, uint8_t *binary) { + bool sz_valid = true; uint64_t vram_size; - u32 msg; int i, ret = 0; + u32 msg; if (!amdgpu_sriov_vf(adev)) { /* It can take up to a second for IFWI init to complete on some dGPUs, @@ -291,9 +292,13 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev, } } - vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20; + vram_size = RREG32(mmRCC_CONFIG_MEMSIZE); + if (!vram_size || vram_size == U32_MAX) + sz_valid = false; + else + vram_size <<= 20; - if (vram_size) { + if (sz_valid) { uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET; amdgpu_device_vram_access(adev, pos, (uint32_t *)binary, adev->mman.discovery_tmr_size, false); @@ -301,6 +306,11 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev, ret = amdgpu_discovery_read_binary_from_sysmem(adev, binary); } + if (ret) + dev_err(adev->dev, + "failed to read discovery info from memory, vram size read: %llx", + vram_size); + return ret; } From d26625d034fb8d596f0488472969493fa02d03f3 Mon Sep 17 00:00:00 2001 From: Vitaly Prosyak Date: Tue, 6 May 2025 16:45:33 -0400 Subject: [PATCH 03/12] drm/amdgpu/gfx10: Refine Cleaner Shader for GFX10.1.10 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch updates the cleaner shader, which is responsible for initializing GPU resources such as Local Data Share (LDS), Vector General Purpose Registers (VGPRs), and Scalar General Purpose Registers (SGPRs). Changes include adjustments to register clearing and shader configuration. - Updated GPU resource initialization addresses in the cleaner shader from `be803080` to `be803000`. - Simplified the logic in the SGPR clearing section, ensuring all SGPRs are set to zero. Fixes: 25961bad9212 ("drm/amdgpu/gfx10: Add cleaner shader for GFX10.1.10") Cc: Christian König Cc: Alex Deucher Signed-off-by: Manu Rastogi Signed-off-by: Vitaly Prosyak Signed-off-by: Srinivasan Shanmugam Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h | 6 +++--- .../drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm | 13 ++++++------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h index 5255378af53c..f67569ccf9f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h @@ -43,9 +43,9 @@ static const u32 gfx_10_1_10_cleaner_shader_hex[] = { 0xd70f6a01, 0x000202ff, 0x00000400, 0x80828102, 0xbf84fff7, 0xbefc03ff, - 0x00000068, 0xbe803080, - 0xbe813080, 0xbe823080, - 0xbe833080, 0x80fc847c, + 0x00000068, 0xbe803000, + 0xbe813000, 0xbe823000, + 0xbe833000, 0x80fc847c, 0xbf84fffa, 0xbeea0480, 0xbeec0480, 0xbeee0480, 0xbef00480, 0xbef20480, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm b/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm index 9ba3359253c9..54f7ed9e2801 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm @@ -40,7 +40,6 @@ shader main type(CS) wave_size(32) // Note: original source code from SQ team - // // Create 32 waves in a threadgroup (CS waves) // Each allocates 64 VGPRs @@ -71,8 +70,8 @@ label_0005: s_sub_u32 s2, s2, 8 s_cbranch_scc0 label_0005 // - s_mov_b32 s2, 0x80000000 // Bit31 is first_wave - s_and_b32 s2, s2, s0 // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set + s_mov_b32 s2, 0x80000000 // Bit31 is first_wave + s_and_b32 s2, s2, s1 // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set s_cbranch_scc0 label_0023 // Clean LDS if its first wave of ThreadGroup/WorkGroup // CLEAR LDS // @@ -99,10 +98,10 @@ label_001F: label_0023: s_mov_b32 m0, 0x00000068 // Loop 108/4=27 times (loop unrolled for performance) label_sgpr_loop: - s_movreld_b32 s0, 0 - s_movreld_b32 s1, 0 - s_movreld_b32 s2, 0 - s_movreld_b32 s3, 0 + s_movreld_b32 s0, s0 + s_movreld_b32 s1, s0 + s_movreld_b32 s2, s0 + s_movreld_b32 s3, s0 s_sub_u32 m0, m0, 4 s_cbranch_scc0 label_sgpr_loop From 5cccf10f652122a17b40df9d672ccf2ed69cd82f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 27 May 2025 10:13:31 -0400 Subject: [PATCH 04/12] drm/amdgpu: disable workload profile switching when OD is enabled Users have reported that they have to reduce the level of undervolting to acheive stability when dynamic workload profiles are enabled on GC 10.3.x. Disable dynamic workload profiles if the user has enabled OD. Fixes: b9467983b774 ("drm/amdgpu: add dynamic workload profile switching for gfx10") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4262 Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.15.x --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 8 ++++++++ drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 22 ++++++++++++++++++++++ drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h | 1 + 3 files changed, 31 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 1db1e6ec0184..c5646af055ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -2228,6 +2228,9 @@ void amdgpu_gfx_profile_ring_begin_use(struct amdgpu_ring *ring) enum PP_SMC_POWER_PROFILE profile; int r; + if (amdgpu_dpm_is_overdrive_enabled(adev)) + return; + if (adev->gfx.num_gfx_rings) profile = PP_SMC_POWER_PROFILE_FULLSCREEN3D; else @@ -2258,6 +2261,11 @@ void amdgpu_gfx_profile_ring_begin_use(struct amdgpu_ring *ring) void amdgpu_gfx_profile_ring_end_use(struct amdgpu_ring *ring) { + struct amdgpu_device *adev = ring->adev; + + if (amdgpu_dpm_is_overdrive_enabled(adev)) + return; + atomic_dec(&ring->adev->gfx.total_submission_cnt); schedule_delayed_work(&ring->adev->gfx.idle_work, GFX_PROFILE_IDLE_TIMEOUT); diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index d98c95d1ed83..5c1cbdc122d2 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -1697,6 +1697,28 @@ int amdgpu_dpm_is_overdrive_supported(struct amdgpu_device *adev) } } +int amdgpu_dpm_is_overdrive_enabled(struct amdgpu_device *adev) +{ + if (is_support_sw_smu(adev)) { + struct smu_context *smu = adev->powerplay.pp_handle; + + return smu->od_enabled; + } else { + struct pp_hwmgr *hwmgr; + + /* + * dpm on some legacy asics don't carry od_enabled member + * as its pp_handle is casted directly from adev. + */ + if (amdgpu_dpm_is_legacy_dpm(adev)) + return false; + + hwmgr = (struct pp_hwmgr *)adev->powerplay.pp_handle; + + return hwmgr->od_enabled; + } +} + int amdgpu_dpm_set_pp_table(struct amdgpu_device *adev, const char *buf, size_t size) diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h index c0f9ecb97fcc..768317ee1486 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h @@ -563,6 +563,7 @@ int amdgpu_dpm_get_smu_prv_buf_details(struct amdgpu_device *adev, void **addr, size_t *size); int amdgpu_dpm_is_overdrive_supported(struct amdgpu_device *adev); +int amdgpu_dpm_is_overdrive_enabled(struct amdgpu_device *adev); int amdgpu_dpm_set_pp_table(struct amdgpu_device *adev, const char *buf, size_t size); From 98a46a408998102af5c45adce0871acd7967bb59 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 23 May 2025 19:05:58 +0300 Subject: [PATCH 05/12] drm/amdgpu: Fix integer overflow issues in amdgpu_userq_fence.c This patch only affects 32bit systems. There are several integer overflows bugs here but only the "sizeof(u32) * num_syncobj" multiplication is a problem at runtime. (The last lines of this patch). These variables are u32 variables that come from the user. The issue is the multiplications can overflow leading to us allocating a smaller buffer than intended. For the first couple integer overflows, the syncobj_handles = memdup_user() allocation is immediately followed by a kmalloc_array(): syncobj = kmalloc_array(num_syncobj_handles, sizeof(*syncobj), GFP_KERNEL); In that situation the kmalloc_array() works as a bounds check and we haven't accessed the syncobj_handlesp[] array yet so the integer overflow is harmless. But the "num_syncobj" multiplication doesn't have that and the integer overflow could lead to an out of bounds access. Fixes: a292fdecd728 ("drm/amdgpu: Implement userqueue signal/wait IOCTL") Signed-off-by: Dan Carpenter Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index fc4d0d42e223..a86616c6deef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -430,7 +430,7 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, num_syncobj_handles = args->num_syncobj_handles; syncobj_handles = memdup_user(u64_to_user_ptr(args->syncobj_handles), - sizeof(u32) * num_syncobj_handles); + size_mul(sizeof(u32), num_syncobj_handles)); if (IS_ERR(syncobj_handles)) return PTR_ERR(syncobj_handles); @@ -612,13 +612,13 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, num_read_bo_handles = wait_info->num_bo_read_handles; bo_handles_read = memdup_user(u64_to_user_ptr(wait_info->bo_read_handles), - sizeof(u32) * num_read_bo_handles); + size_mul(sizeof(u32), num_read_bo_handles)); if (IS_ERR(bo_handles_read)) return PTR_ERR(bo_handles_read); num_write_bo_handles = wait_info->num_bo_write_handles; bo_handles_write = memdup_user(u64_to_user_ptr(wait_info->bo_write_handles), - sizeof(u32) * num_write_bo_handles); + size_mul(sizeof(u32), num_write_bo_handles)); if (IS_ERR(bo_handles_write)) { r = PTR_ERR(bo_handles_write); goto free_bo_handles_read; @@ -626,7 +626,7 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, num_syncobj = wait_info->num_syncobj_handles; syncobj_handles = memdup_user(u64_to_user_ptr(wait_info->syncobj_handles), - sizeof(u32) * num_syncobj); + size_mul(sizeof(u32), num_syncobj)); if (IS_ERR(syncobj_handles)) { r = PTR_ERR(syncobj_handles); goto free_bo_handles_write; From 335f1e797c32cbe6f313805125526b35d29280b0 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 23 May 2025 19:09:52 +0300 Subject: [PATCH 06/12] drm/amdgpu: Fix integer overflow in amdgpu_gem_add_input_fence() The "num_syncobj_handles" is a u32 value that comes from the user via the ioctl. On 32bit systems the "sizeof(uint32_t) * num_syncobj_handles" multiplication can have an integer overflow. Use size_mul() to fix that. Fixes: 38c67ec9aa4b ("drm/amdgpu: Add input fence to sync bo map/unmap") Signed-off-by: Dan Carpenter Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 2c68118fe9fd..0ecc88df7208 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -58,7 +58,7 @@ amdgpu_gem_add_input_fence(struct drm_file *filp, return 0; syncobj_handles = memdup_user(u64_to_user_ptr(syncobj_handles_array), - sizeof(uint32_t) * num_syncobj_handles); + size_mul(sizeof(uint32_t), num_syncobj_handles)); if (IS_ERR(syncobj_handles)) return PTR_ERR(syncobj_handles); From e34bcf1594b59f9f63c084bf0646b19edf581adc Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Tue, 27 May 2025 19:13:20 +0530 Subject: [PATCH 07/12] drm/amdgpu: Add userq fence support to SDMAv7.0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add userq fence support to SDMAv7.0. - GFX12's user fence irq src id differs from GFX11's, hence we need create a new irq srcid header file for GFX12. User fence irq src id information- GFX11 and SDMA6.0 - 0x43 GFX12 and SDMA7.0 - 0x46 Signed-off-by: Arunpravin Paneer Selvam Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 10 +-- drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c | 59 ++++++++++----- .../include/ivsrcid/gfx/irqsrcs_gfx_12_0_0.h | 74 +++++++++++++++++++ 3 files changed, 120 insertions(+), 23 deletions(-) create mode 100644 drivers/gpu/drm/amd/include/ivsrcid/gfx/irqsrcs_gfx_12_0_0.h diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index f09d96bfee16..1234c8d64e20 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -36,7 +36,7 @@ #include "gc/gc_12_0_0_offset.h" #include "gc/gc_12_0_0_sh_mask.h" #include "soc24_enum.h" -#include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h" +#include "ivsrcid/gfx/irqsrcs_gfx_12_0_0.h" #include "soc15.h" #include "clearstate_gfx12.h" @@ -1453,28 +1453,28 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block) /* EOP Event */ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, - GFX_11_0_0__SRCID__CP_EOP_INTERRUPT, + GFX_12_0_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq); if (r) return r; /* Bad opcode Event */ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, - GFX_11_0_0__SRCID__CP_BAD_OPCODE_ERROR, + GFX_12_0_0__SRCID__CP_BAD_OPCODE_ERROR, &adev->gfx.bad_op_irq); if (r) return r; /* Privileged reg */ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, - GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT, + GFX_12_0_0__SRCID__CP_PRIV_REG_FAULT, &adev->gfx.priv_reg_irq); if (r) return r; /* Privileged inst */ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, - GFX_11_0_0__SRCID__CP_PRIV_INSTR_FAULT, + GFX_12_0_0__SRCID__CP_PRIV_INSTR_FAULT, &adev->gfx.priv_inst_irq); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index befe013b11a7..ad47d0bdf777 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -33,7 +33,7 @@ #include "gc/gc_12_0_0_offset.h" #include "gc/gc_12_0_0_sh_mask.h" #include "hdp/hdp_6_0_0_offset.h" -#include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h" +#include "ivsrcid/gfx/irqsrcs_gfx_12_0_0.h" #include "soc15_common.h" #include "soc15.h" @@ -43,6 +43,7 @@ #include "sdma_v7_0.h" #include "v12_structs.h" #include "mes_userqueue.h" +#include "amdgpu_userq_fence.h" MODULE_FIRMWARE("amdgpu/sdma_7_0_0.bin"); MODULE_FIRMWARE("amdgpu/sdma_7_0_1.bin"); @@ -910,6 +911,9 @@ static int sdma_v7_0_mqd_init(struct amdgpu_device *adev, void *mqd, m->sdmax_rlcx_csa_addr_lo = lower_32_bits(prop->csa_addr); m->sdmax_rlcx_csa_addr_hi = upper_32_bits(prop->csa_addr); + m->sdmax_rlcx_mcu_dbg0 = lower_32_bits(prop->fence_address); + m->sdmax_rlcx_mcu_dbg1 = upper_32_bits(prop->fence_address); + return 0; } @@ -1296,11 +1300,18 @@ static int sdma_v7_0_sw_init(struct amdgpu_ip_block *ip_block) /* SDMA trap event */ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX, - GFX_11_0_0__SRCID__SDMA_TRAP, + GFX_12_0_0__SRCID__SDMA_TRAP, &adev->sdma.trap_irq); if (r) return r; + /* SDMA user fence event */ + r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX, + GFX_12_0_0__SRCID__SDMA_FENCE, + &adev->sdma.fence_irq); + if (r) + return r; + for (i = 0; i < adev->sdma.num_instances; i++) { ring = &adev->sdma.instance[i].ring; ring->ring_obj = NULL; @@ -1526,25 +1537,9 @@ static int sdma_v7_0_process_trap_irq(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { int instances, queue; - uint32_t mes_queue_id = entry->src_data[0]; DRM_DEBUG("IH: SDMA trap\n"); - if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) { - struct amdgpu_mes_queue *queue; - - mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK; - - spin_lock(&adev->mes.queue_id_lock); - queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id); - if (queue) { - DRM_DEBUG("process smda queue id = %d\n", mes_queue_id); - amdgpu_fence_process(queue->ring); - } - spin_unlock(&adev->mes.queue_id_lock); - return 0; - } - queue = entry->ring_id & 0xf; instances = (entry->ring_id & 0xf0) >> 4; if (instances > 1) { @@ -1566,6 +1561,29 @@ static int sdma_v7_0_process_trap_irq(struct amdgpu_device *adev, return 0; } +static int sdma_v7_0_process_fence_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + u32 doorbell_offset = entry->src_data[0]; + + if (adev->enable_mes && doorbell_offset) { + struct amdgpu_userq_fence_driver *fence_drv = NULL; + struct xarray *xa = &adev->userq_xa; + unsigned long flags; + + doorbell_offset >>= SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT; + + xa_lock_irqsave(xa, flags); + fence_drv = xa_load(xa, doorbell_offset); + if (fence_drv) + amdgpu_userq_fence_driver_process(fence_drv); + xa_unlock_irqrestore(xa, flags); + } + + return 0; +} + static int sdma_v7_0_process_illegal_inst_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) @@ -1703,6 +1721,10 @@ static const struct amdgpu_irq_src_funcs sdma_v7_0_trap_irq_funcs = { .process = sdma_v7_0_process_trap_irq, }; +static const struct amdgpu_irq_src_funcs sdma_v7_0_fence_irq_funcs = { + .process = sdma_v7_0_process_fence_irq, +}; + static const struct amdgpu_irq_src_funcs sdma_v7_0_illegal_inst_irq_funcs = { .process = sdma_v7_0_process_illegal_inst_irq, }; @@ -1712,6 +1734,7 @@ static void sdma_v7_0_set_irq_funcs(struct amdgpu_device *adev) adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE0 + adev->sdma.num_instances; adev->sdma.trap_irq.funcs = &sdma_v7_0_trap_irq_funcs; + adev->sdma.fence_irq.funcs = &sdma_v7_0_fence_irq_funcs; adev->sdma.illegal_inst_irq.funcs = &sdma_v7_0_illegal_inst_irq_funcs; } diff --git a/drivers/gpu/drm/amd/include/ivsrcid/gfx/irqsrcs_gfx_12_0_0.h b/drivers/gpu/drm/amd/include/ivsrcid/gfx/irqsrcs_gfx_12_0_0.h new file mode 100644 index 000000000000..467897ec2e65 --- /dev/null +++ b/drivers/gpu/drm/amd/include/ivsrcid/gfx/irqsrcs_gfx_12_0_0.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __IRQSRCS_GFX_12_0_0_H__ +#define __IRQSRCS_GFX_12_0_0_H__ + +#define GFX_12_0_0__SRCID__UTCL2_FAULT 0 // UTCL2 has encountered a fault or retry scenario +#define GFX_12_0_0__SRCID__UTCL2_DATA_POISONING 1 // UTCL2 for data poisoning +#define GFX_12_0_0__SRCID__MEM_ACCES_MON 10 // 0x0A EA memory access monitor interrupt +#define GFX_12_0_0__SRCID__SDMA_ATOMIC_RTN_DONE 48 // 0x30 SDMA atomic*_rtn ops complete +#define GFX_12_0_0__SRCID__SDMA_TRAP 49 // 0x31 Trap +#define GFX_12_0_0__SRCID__SDMA_SRBMWRITE 50 // 0x32 SRBM write Protection +#define GFX_12_0_0__SRCID__SDMA_CTXEMPTY 51 // 0x33 Context Empty +#define GFX_12_0_0__SRCID__SDMA_PREEMPT 52 // 0x34 SDMA New Run List +#define GFX_12_0_0__SRCID__SDMA_IB_PREEMPT 53 // 0x35 sdma mid - command buffer preempt interrupt +#define GFX_12_0_0__SRCID__SDMA_DOORBELL_INVALID 54 // 0x36 Doorbell BE invalid +#define GFX_12_0_0__SRCID__SDMA_QUEUE_HANG 55 // 0x37 Queue hang or Command timeout +#define GFX_12_0_0__SRCID__SDMA_ATOMIC_TIMEOUT 56 // 0x38 SDMA atomic CMPSWAP loop timeout +#define GFX_12_0_0__SRCID__SDMA_POLL_TIMEOUT 57 // 0x39 SRBM read poll timeout +#define GFX_12_0_0__SRCID__SDMA_PAGE_TIMEOUT 58 // 0x3A Page retry timeout after UTCL2 return nack = 1 +#define GFX_12_0_0__SRCID__SDMA_PAGE_NULL 59 // 0x3B Page Null from UTCL2 when nack = 2 +#define GFX_12_0_0__SRCID__SDMA_PAGE_FAULT 60 // 0x3C Page Fault Error from UTCL2 when nack = 3 +#define GFX_12_0_0__SRCID__SDMA_VM_HOLE 61 // 0x3D MC or SEM address in VM hole +#define GFX_12_0_0__SRCID__SDMA_ECC 62 // 0x3E ECC Error +#define GFX_12_0_0__SRCID__SDMA_FROZEN 63 // 0x3F SDMA Frozen +#define GFX_12_0_0__SRCID__SDMA_SRAM_ECC 64 // 0x40 SRAM ECC Error +#define GFX_12_0_0__SRCID__SDMA_SEM_INCOMPLETE_TIMEOUT 65 // 0x41 GPF(Sem incomplete timeout) +#define GFX_12_0_0__SRCID__SDMA_SEM_WAIT_FAIL_TIMEOUT 66 // 0x42 Semaphore wait fail timeout +#define GFX_12_0_0__SRCID__SDMA_FENCE 70 // 0x46 User fence +#define GFX_12_0_0__SRCID__RLC_GC_FED_INTERRUPT 128 // 0x80 FED Interrupt (for data poisoning) +#define GFX_12_0_0__SRCID__CP_GENERIC_INT 177 // 0xB1 CP_GENERIC int +#define GFX_12_0_0__SRCID__CP_PM4_PKT_RSVD_BIT_ERROR 180 // 0xB4 PM4 Pkt Rsvd Bits Error +#define GFX_12_0_0__SRCID__CP_EOP_INTERRUPT 181 // 0xB5 End-of-Pipe Interrupt +#define GFX_12_0_0__SRCID__CP_BAD_OPCODE_ERROR 183 // 0xB7 Bad Opcode Error +#define GFX_12_0_0__SRCID__CP_PRIV_REG_FAULT 184 // 0xB8 Privileged Register Fault +#define GFX_12_0_0__SRCID__CP_PRIV_INSTR_FAULT 185 // 0xB9 Privileged Instr Fault +#define GFX_12_0_0__SRCID__CP_WAIT_MEM_SEM_FAULT 186 // 0xBA Wait Memory Semaphore Fault (Sync Object Fault) +#define GFX_12_0_0__SRCID__CP_CTX_EMPTY_INTERRUPT 187 // 0xBB Context Empty Interrupt +#define GFX_12_0_0__SRCID__CP_CTX_BUSY_INTERRUPT 188 // 0xBC Context Busy Interrupt +#define GFX_12_0_0__SRCID__CP_ME_WAIT_REG_MEM_POLL_TIMEOUT 192 // 0xC0 CP.ME Wait_Reg_Mem Poll Timeout +#define GFX_12_0_0__SRCID__CP_SIG_INCOMPLETE 193 // 0xC1 "Surface Probe Fault Signal Incomplete" +#define GFX_12_0_0__SRCID__CP_PREEMPT_ACK 194 // 0xC2 Preemption Ack-wledge +#define GFX_12_0_0__SRCID__CP_GPF 195 // 0xC3 General Protection Fault (GPF) +#define GFX_12_0_0__SRCID__CP_GDS_ALLOC_ERROR 196 // 0xC4 GDS Alloc Error +#define GFX_12_0_0__SRCID__CP_ECC_ERROR 197 // 0xC5 ECC Error +#define GFX_12_0_0__SRCID__CP_COMPUTE_QUERY_STATUS 199 // 0xC7 Compute query status +#define GFX_12_0_0__SRCID__CP_VM_DOORBELL 200 // 0xC8 Unattached VM Doorbell Received +#define GFX_12_0_0__SRCID__CP_FUE_ERROR 201 // 0xC9 ECC FUE Error +#define GFX_12_0_0__SRCID__RLC_STRM_PERF_MONITOR_INTERRUPT 202 // 0xCA Streaming Perf Monitor Interrupt +#define GFX_12_0_0__SRCID__GRBM_RD_TIMEOUT_ERROR 232 // 0xE8 CRead timeout error +#define GFX_12_0_0__SRCID__GRBM_REG_GUI_IDLE 233 // 0xE9 Register GUI Idle +#define GFX_12_0_0__SRCID__SQ_INTERRUPT_ID 239 // 0xEF SQ Interrupt (ttrace wrap, errors) + +#endif From 893f07452bca56ff146a6be02b3294a9ea23d18a Mon Sep 17 00:00:00 2001 From: Zhongwei Zhang Date: Tue, 13 May 2025 16:45:59 +0800 Subject: [PATCH 08/12] drm/amd/display: Correct non-OLED pre_T11_delay. [Why] Only OLED panels require non-zero pre_T11_delay defaultly. Others should be controlled by power sequence. [How] For non OLED, pre_T11_delay delay in code should be zero. Also post_T7_delay. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Charlene Liu Signed-off-by: Zhongwei Zhang Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index 23bec5d25ed6..b88b2d6b4e81 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -952,8 +952,8 @@ void dce110_edp_backlight_control( struct dc_context *ctx = link->ctx; struct bp_transmitter_control cntl = { 0 }; uint8_t pwrseq_instance = 0; - unsigned int pre_T11_delay = OLED_PRE_T11_DELAY; - unsigned int post_T7_delay = OLED_POST_T7_DELAY; + unsigned int pre_T11_delay = (link->dpcd_sink_ext_caps.bits.oled ? OLED_PRE_T11_DELAY : 0); + unsigned int post_T7_delay = (link->dpcd_sink_ext_caps.bits.oled ? OLED_POST_T7_DELAY : 0); if (dal_graphics_object_id_get_connector_id(link->link_enc->connector) != CONNECTOR_ID_EDP) { @@ -1069,7 +1069,8 @@ void dce110_edp_backlight_control( if (!enable) { /*follow oem panel config's requirement*/ pre_T11_delay += link->panel_config.pps.extra_pre_t11_ms; - msleep(pre_T11_delay); + if (pre_T11_delay) + msleep(pre_T11_delay); } } From 747bfca45e07b201cf80f3ba7338006f4525aeed Mon Sep 17 00:00:00 2001 From: Zhongwei Zhang Date: Fri, 16 May 2025 14:44:21 +0800 Subject: [PATCH 09/12] drm/amd/display: Avoid calling blank_stream() twice [Why] We've made fix for garbage in dcn31_reset_back_end_for_pipe(), adding blank_stream() before disable_crtc(). And set_dpms_off() will call blank_stream() again. [How] Add flag to avoid calling blank_stream() twice. Reviewed-by: Nicholas Kazlauskas Signed-off-by: Zhongwei Zhang Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c | 3 +++ .../gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c | 11 +++++++++-- .../drm/amd/display/dc/hwss/hw_sequencer_private.h | 1 + 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index b88b2d6b4e81..e8730cc40edb 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -1221,6 +1221,9 @@ void dce110_blank_stream(struct pipe_ctx *pipe_ctx) struct dc_link *link = stream->link; struct dce_hwseq *hws = link->dc->hwseq; + if (hws && hws->wa_state.skip_blank_stream) + return; + if (link->local_sink && link->local_sink->sink_signal == SIGNAL_TYPE_EDP) { if (!link->skip_implict_edp_power_control) hws->funcs.edp_backlight_control(link, false); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c index f38340aa3f15..5ba3999991b0 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c @@ -526,9 +526,15 @@ static void dcn31_reset_back_end_for_pipe( link = pipe_ctx->stream->link; + if (dc->hwseq) + dc->hwseq->wa_state.skip_blank_stream = false; + if ((!pipe_ctx->stream->dpms_off || link->link_status.link_active) && - (link->connector_signal == SIGNAL_TYPE_EDP)) + (link->connector_signal == SIGNAL_TYPE_EDP)) { dc->hwss.blank_stream(pipe_ctx); + if (dc->hwseq) + dc->hwseq->wa_state.skip_blank_stream = true; + } pipe_ctx->stream_res.tg->funcs->set_dsc_config( pipe_ctx->stream_res.tg, @@ -570,7 +576,8 @@ static void dcn31_reset_back_end_for_pipe( pipe_ctx->stream_res.audio = NULL; } } - + if (dc->hwseq) + dc->hwseq->wa_state.skip_blank_stream = false; pipe_ctx->stream = NULL; DC_LOG_DEBUG("Reset back end for pipe %d, tg:%d\n", pipe_ctx->pipe_idx, pipe_ctx->stream_res.tg->inst); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h index 09bc65c2fa23..1e2d247fbbac 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h @@ -49,6 +49,7 @@ struct hwseq_wa_state { bool DEGVIDCN10_253_applied; bool disallow_self_refresh_during_multi_plane_transition_applied; unsigned int disallow_self_refresh_during_multi_plane_transition_applied_on_frame; + bool skip_blank_stream; }; struct pipe_ctx; From c73375d918452e58f8903685d30ae21603040709 Mon Sep 17 00:00:00 2001 From: Cruise Hung Date: Thu, 22 May 2025 18:02:14 +0800 Subject: [PATCH 10/12] drm/amd/display: Use DC log instead of using DM error msg [Why & How] It sent an error msg when it failed to read the DP tunneling DPCD field. This should just be a warning msg. Use a DC log instead of a DM error msg. Reviewed-by: Wenjing Liu Signed-off-by: Cruise Hung Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c index 8f79881ad9f1..a5127c2d47ef 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c @@ -2023,7 +2023,7 @@ static bool retrieve_link_cap(struct dc_link *link) /* Read DP tunneling information. */ status = dpcd_get_tunneling_device_data(link); if (status != DC_OK) - dm_error("%s: Read DP tunneling device data failed.\n", __func__); + DC_LOG_DP2("%s: Read DP tunneling device data failed.\n", __func__); retrieve_cable_id(link); dpcd_write_cable_id_to_dprx(link); From 4b61b8a390511a1864f26cc42bab72881e93468d Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 14 May 2025 16:00:43 -0500 Subject: [PATCH 11/12] drm/amd/display: Add debugging message for brightness caps [Why] Default BIOS brightness caps are buried in ACPI. [How] Add extra dynamic debug that can show default brightness caps. Reviewed-by: Alex Hung Signed-off-by: Mario Limonciello Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 1797fa85fac6..588f6afb1447 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4910,6 +4910,7 @@ amdgpu_dm_register_backlight_device(struct amdgpu_dm_connector *aconnector) struct backlight_properties props = { 0 }; struct amdgpu_dm_backlight_caps caps = { 0 }; char bl_name[16]; + int min, max; if (aconnector->bl_idx == -1) return; @@ -4922,11 +4923,15 @@ amdgpu_dm_register_backlight_device(struct amdgpu_dm_connector *aconnector) } amdgpu_acpi_get_backlight_caps(&caps); - if (caps.caps_valid) { + if (caps.caps_valid && get_brightness_range(&caps, &min, &max)) { if (power_supply_is_system_supplied() > 0) props.brightness = caps.ac_level; else props.brightness = caps.dc_level; + /* min is zero, so max needs to be adjusted */ + props.max_brightness = max - min; + drm_dbg(drm, "Backlight caps: min: %d, max: %d, ac %d, dc %d\n", min, max, + caps.ac_level, caps.dc_level); } else props.brightness = AMDGPU_MAX_BL_LEVEL; From 8b5f3a229a70d242322b78c8e13744ca00212def Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 14 May 2025 16:06:40 -0500 Subject: [PATCH 12/12] drm/amd/display: Fix default DC and AC levels [Why] DC and AC levels are advertised in a percentage, not a luminance. [How] Scale DC and AC levels to supported values. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4221 Reviewed-by: Alex Hung Signed-off-by: Mario Limonciello Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 588f6afb1447..d3100f641ac6 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4925,9 +4925,9 @@ amdgpu_dm_register_backlight_device(struct amdgpu_dm_connector *aconnector) amdgpu_acpi_get_backlight_caps(&caps); if (caps.caps_valid && get_brightness_range(&caps, &min, &max)) { if (power_supply_is_system_supplied() > 0) - props.brightness = caps.ac_level; + props.brightness = (max - min) * DIV_ROUND_CLOSEST(caps.ac_level, 100); else - props.brightness = caps.dc_level; + props.brightness = (max - min) * DIV_ROUND_CLOSEST(caps.dc_level, 100); /* min is zero, so max needs to be adjusted */ props.max_brightness = max - min; drm_dbg(drm, "Backlight caps: min: %d, max: %d, ac %d, dc %d\n", min, max,