amd-drm-fixes-6.15-2025-04-09:

amdgpu:
 - MES FW version caching fixes
 - Only use GTT as a fallback if we already have a backing store
 - dma_buf fix
 - IP discovery fix
 - Replay and PSR with VRR fix
 - DC FP fixes
 - eDP fixes
 - KIQ TLB invalidate fix
 - Enable dmem groups support
 - Allow pinning VRAM dma bufs if imports can do P2P
 - Workload profile fixes
 - Prevent possible division by 0 in fan handling
 
 amdkfd:
 - Queue reset fixes
 -----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQQgO5Idg2tXNTSZAr293/aFa7yZ2AUCZ/akUAAKCRC93/aFa7yZ
 2HtpAP9ptJj5RKxOcdh8lbiOZN3RGjxsqw6wXk7LUnw3A2npVAD5AXoaTis4S84e
 0NIY7bd1Q1njP2akA4O5AKykkpgVtgw=
 =Ha4H
 -----END PGP SIGNATURE-----

Merge tag 'amd-drm-fixes-6.15-2025-04-09' of https://gitlab.freedesktop.org/agd5f/linux into drm-fixes

amd-drm-fixes-6.15-2025-04-09:

amdgpu:
- MES FW version caching fixes
- Only use GTT as a fallback if we already have a backing store
- dma_buf fix
- IP discovery fix
- Replay and PSR with VRR fix
- DC FP fixes
- eDP fixes
- KIQ TLB invalidate fix
- Enable dmem groups support
- Allow pinning VRAM dma bufs if imports can do P2P
- Workload profile fixes
- Prevent possible division by 0 in fan handling

amdkfd:
- Queue reset fixes

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Alex Deucher <alexander.deucher@amd.com>
Link: https://lore.kernel.org/r/20250409165238.1180153-1-alexander.deucher@amd.com
This commit is contained in:
Dave Airlie 2025-04-10 17:04:33 +10:00
commit 47271a0cae
20 changed files with 187 additions and 36 deletions

View File

@ -353,7 +353,6 @@ enum amdgpu_kiq_irq {
AMDGPU_CP_KIQ_IRQ_DRIVER0 = 0,
AMDGPU_CP_KIQ_IRQ_LAST
};
#define SRIOV_USEC_TIMEOUT 1200000 /* wait 12 * 100ms for SRIOV */
#define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */
#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */
#define MAX_KIQ_REG_TRY 1000

View File

@ -3643,6 +3643,13 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
adev, adev->ip_blocks[i].version->type))
continue;
/* Since we skip suspend for S0i3, we need to cancel the delayed
* idle work here as the suspend callback never gets called.
*/
if (adev->in_s0ix &&
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX &&
amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 0, 0))
cancel_delayed_work_sync(&adev->gfx.idle_work);
/* skip suspend of gfx/mes and psp for S0ix
* gfx is in gfxoff state, so on resume it will exit gfxoff just
* like at runtime. PSP is also part of the always on hardware

View File

@ -120,6 +120,8 @@ MODULE_FIRMWARE("amdgpu/vega20_ip_discovery.bin");
MODULE_FIRMWARE("amdgpu/raven_ip_discovery.bin");
MODULE_FIRMWARE("amdgpu/raven2_ip_discovery.bin");
MODULE_FIRMWARE("amdgpu/picasso_ip_discovery.bin");
MODULE_FIRMWARE("amdgpu/arcturus_ip_discovery.bin");
MODULE_FIRMWARE("amdgpu/aldebaran_ip_discovery.bin");
#define mmIP_DISCOVERY_VERSION 0x16A00
#define mmRCC_CONFIG_MEMSIZE 0xde3

View File

@ -75,11 +75,25 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
*/
static int amdgpu_dma_buf_pin(struct dma_buf_attachment *attach)
{
struct drm_gem_object *obj = attach->dmabuf->priv;
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
struct dma_buf *dmabuf = attach->dmabuf;
struct amdgpu_bo *bo = gem_to_amdgpu_bo(dmabuf->priv);
u32 domains = bo->preferred_domains;
/* pin buffer into GTT */
return amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
dma_resv_assert_held(dmabuf->resv);
/*
* Try pinning into VRAM to allow P2P with RDMA NICs without ODP
* support if all attachments can do P2P. If any attachment can't do
* P2P just pin into GTT instead.
*/
list_for_each_entry(attach, &dmabuf->attachments, node)
if (!attach->peer2peer)
domains &= ~AMDGPU_GEM_DOMAIN_VRAM;
if (domains & AMDGPU_GEM_DOMAIN_VRAM)
bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
return amdgpu_bo_pin(bo, domains);
}
/**
@ -134,9 +148,6 @@ static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach,
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
if (r)
return ERR_PTR(r);
} else if (bo->tbo.resource->mem_type != TTM_PL_TT) {
return ERR_PTR(-EBUSY);
}
switch (bo->tbo.resource->mem_type) {
@ -184,7 +195,7 @@ static void amdgpu_dma_buf_unmap(struct dma_buf_attachment *attach,
struct sg_table *sgt,
enum dma_data_direction dir)
{
if (sgt->sgl->page_link) {
if (sg_page(sgt->sgl)) {
dma_unmap_sgtable(attach->dev, sgt, dir, 0);
sg_free_table(sgt);
kfree(sgt);

View File

@ -699,12 +699,10 @@ int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid,
uint32_t flush_type, bool all_hub,
uint32_t inst)
{
u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT :
adev->usec_timeout;
struct amdgpu_ring *ring = &adev->gfx.kiq[inst].ring;
struct amdgpu_kiq *kiq = &adev->gfx.kiq[inst];
unsigned int ndw;
int r;
int r, cnt = 0;
uint32_t seq;
/*
@ -761,10 +759,21 @@ int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid,
amdgpu_ring_commit(ring);
spin_unlock(&adev->gfx.kiq[inst].ring_lock);
if (amdgpu_fence_wait_polling(ring, seq, usec_timeout) < 1) {
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
might_sleep();
while (r < 1 && cnt++ < MAX_KIQ_REG_TRY &&
!amdgpu_reset_pending(adev->reset_domain)) {
msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
}
if (cnt > MAX_KIQ_REG_TRY) {
dev_err(adev->dev, "timeout waiting for kiq fence\n");
r = -ETIME;
}
} else
r = 0;
}
error_unlock_reset:

View File

@ -163,8 +163,8 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
* When GTT is just an alternative to VRAM make sure that we
* only use it as fallback and still try to fill up VRAM first.
*/
if (domain & abo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM &&
!(adev->flags & AMD_IS_APU))
if (abo->tbo.resource && !(adev->flags & AMD_IS_APU) &&
domain & abo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM)
places[c].flags |= TTM_PL_FLAG_FALLBACK;
c++;
}

View File

@ -24,6 +24,7 @@
#include <linux/dma-mapping.h>
#include <drm/ttm/ttm_range_manager.h>
#include <drm/drm_drv.h>
#include "amdgpu.h"
#include "amdgpu_vm.h"
@ -907,6 +908,9 @@ int amdgpu_vram_mgr_init(struct amdgpu_device *adev)
struct ttm_resource_manager *man = &mgr->manager;
int err;
man->cg = drmm_cgroup_register_region(adev_to_drm(adev), "vram", adev->gmc.real_vram_size);
if (IS_ERR(man->cg))
return PTR_ERR(man->cg);
ttm_resource_manager_init(man, &adev->mman.bdev,
adev->gmc.real_vram_size);

View File

@ -894,6 +894,10 @@ static void mes_v11_0_get_fw_version(struct amdgpu_device *adev)
{
int pipe;
/* return early if we have already fetched these */
if (adev->mes.sched_version && adev->mes.kiq_version)
return;
/* get MES scheduler/KIQ versions */
mutex_lock(&adev->srbm_mutex);

View File

@ -1392,17 +1392,20 @@ static int mes_v12_0_queue_init(struct amdgpu_device *adev,
mes_v12_0_queue_init_register(ring);
}
/* get MES scheduler/KIQ versions */
mutex_lock(&adev->srbm_mutex);
soc21_grbm_select(adev, 3, pipe, 0, 0);
if (((pipe == AMDGPU_MES_SCHED_PIPE) && !adev->mes.sched_version) ||
((pipe == AMDGPU_MES_KIQ_PIPE) && !adev->mes.kiq_version)) {
/* get MES scheduler/KIQ versions */
mutex_lock(&adev->srbm_mutex);
soc21_grbm_select(adev, 3, pipe, 0, 0);
if (pipe == AMDGPU_MES_SCHED_PIPE)
adev->mes.sched_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO);
else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq)
adev->mes.kiq_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO);
if (pipe == AMDGPU_MES_SCHED_PIPE)
adev->mes.sched_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO);
else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq)
adev->mes.kiq_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO);
soc21_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
soc21_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
}
return 0;
}

View File

@ -1983,9 +1983,6 @@ static void kfd_topology_set_capabilities(struct kfd_topology_device *dev)
if (kfd_dbg_has_ttmps_always_setup(dev->gpu))
dev->node_props.debug_prop |= HSA_DBG_DISPATCH_INFO_ALWAYS_VALID;
if (dev->gpu->adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)
dev->node_props.capability2 |= HSA_CAP2_PER_SDMA_QUEUE_RESET_SUPPORTED;
if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(10, 0, 0)) {
if (KFD_GC_VERSION(dev->gpu) == IP_VERSION(9, 4, 3) ||
KFD_GC_VERSION(dev->gpu) == IP_VERSION(9, 4, 4))
@ -2001,7 +1998,11 @@ static void kfd_topology_set_capabilities(struct kfd_topology_device *dev)
dev->node_props.capability |=
HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED;
dev->node_props.capability |= HSA_CAP_PER_QUEUE_RESET_SUPPORTED;
if (!amdgpu_sriov_vf(dev->gpu->adev))
dev->node_props.capability |= HSA_CAP_PER_QUEUE_RESET_SUPPORTED;
if (dev->gpu->adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)
dev->node_props.capability2 |= HSA_CAP2_PER_SDMA_QUEUE_RESET_SUPPORTED;
} else {
dev->node_props.debug_prop |= HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX10 |
HSA_DBG_WATCH_ADDR_MASK_HI_BIT;

View File

@ -1722,6 +1722,13 @@ static const struct dmi_system_id dmi_quirk_table[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "HP Elite mt645 G8 Mobile Thin Client"),
},
},
{
.callback = edp0_on_dp1_callback,
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "HP"),
DMI_MATCH(DMI_PRODUCT_NAME, "HP EliteBook 645 14 inch G11 Notebook PC"),
},
},
{
.callback = edp0_on_dp1_callback,
.matches = {
@ -1729,6 +1736,20 @@ static const struct dmi_system_id dmi_quirk_table[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "HP EliteBook 665 16 inch G11 Notebook PC"),
},
},
{
.callback = edp0_on_dp1_callback,
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "HP"),
DMI_MATCH(DMI_PRODUCT_NAME, "HP ProBook 445 14 inch G11 Notebook PC"),
},
},
{
.callback = edp0_on_dp1_callback,
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "HP"),
DMI_MATCH(DMI_PRODUCT_NAME, "HP ProBook 465 16 inch G11 Notebook PC"),
},
},
{}
/* TODO: refactor this from a fixed table to a dynamic option */
};

View File

@ -113,6 +113,7 @@ bool amdgpu_dm_crtc_vrr_active(const struct dm_crtc_state *dm_state)
*
* Panel Replay and PSR SU
* - Enable when:
* - VRR is disabled
* - vblank counter is disabled
* - entry is allowed: usermode demonstrates an adequate number of fast
* commits)
@ -131,19 +132,20 @@ static void amdgpu_dm_crtc_set_panel_sr_feature(
bool is_sr_active = (link->replay_settings.replay_allow_active ||
link->psr_settings.psr_allow_active);
bool is_crc_window_active = false;
bool vrr_active = amdgpu_dm_crtc_vrr_active_irq(vblank_work->acrtc);
#ifdef CONFIG_DRM_AMD_SECURE_DISPLAY
is_crc_window_active =
amdgpu_dm_crc_window_is_activated(&vblank_work->acrtc->base);
#endif
if (link->replay_settings.replay_feature_enabled &&
if (link->replay_settings.replay_feature_enabled && !vrr_active &&
allow_sr_entry && !is_sr_active && !is_crc_window_active) {
amdgpu_dm_replay_enable(vblank_work->stream, true);
} else if (vblank_enabled) {
if (link->psr_settings.psr_version < DC_PSR_VERSION_SU_1 && is_sr_active)
amdgpu_dm_psr_disable(vblank_work->stream, false);
} else if (link->psr_settings.psr_feature_enabled &&
} else if (link->psr_settings.psr_feature_enabled && !vrr_active &&
allow_sr_entry && !is_sr_active && !is_crc_window_active) {
struct amdgpu_dm_connector *aconn =
@ -244,6 +246,8 @@ static void amdgpu_dm_crtc_vblank_control_worker(struct work_struct *work)
struct vblank_control_work *vblank_work =
container_of(work, struct vblank_control_work, work);
struct amdgpu_display_manager *dm = vblank_work->dm;
struct amdgpu_device *adev = drm_to_adev(dm->ddev);
int r;
mutex_lock(&dm->dc_lock);
@ -271,8 +275,15 @@ static void amdgpu_dm_crtc_vblank_control_worker(struct work_struct *work)
vblank_work->acrtc->dm_irq_params.allow_sr_entry);
}
if (dm->active_vblank_irq_count == 0)
if (dm->active_vblank_irq_count == 0) {
r = amdgpu_dpm_pause_power_profile(adev, true);
if (r)
dev_warn(adev->dev, "failed to set default power profile mode\n");
dc_allow_idle_optimizations(dm->dc, true);
r = amdgpu_dpm_pause_power_profile(adev, false);
if (r)
dev_warn(adev->dev, "failed to restore the power profile mode\n");
}
mutex_unlock(&dm->dc_lock);

View File

@ -86,6 +86,8 @@ static void dml21_init(const struct dc *in_dc, struct dml2_context **dml_ctx, co
/* Store configuration options */
(*dml_ctx)->config = *config;
DC_FP_START();
/*Initialize SOCBB and DCNIP params */
dml21_initialize_soc_bb_params(&(*dml_ctx)->v21.dml_init, config, in_dc);
dml21_initialize_ip_params(&(*dml_ctx)->v21.dml_init, config, in_dc);
@ -96,6 +98,8 @@ static void dml21_init(const struct dc *in_dc, struct dml2_context **dml_ctx, co
/*Initialize DML21 instance */
dml2_initialize_instance(&(*dml_ctx)->v21.dml_init);
DC_FP_END();
}
bool dml21_create(const struct dc *in_dc, struct dml2_context **dml_ctx, const struct dml2_configuration_options *config)
@ -283,11 +287,16 @@ bool dml21_validate(const struct dc *in_dc, struct dc_state *context, struct dml
{
bool out = false;
DC_FP_START();
/* Use dml_validate_only for fast_validate path */
if (fast_validate) {
if (fast_validate)
out = dml21_check_mode_support(in_dc, context, dml_ctx);
} else
else
out = dml21_mode_check_and_programming(in_dc, context, dml_ctx);
DC_FP_END();
return out;
}
@ -426,8 +435,12 @@ void dml21_copy(struct dml2_context *dst_dml_ctx,
dst_dml_ctx->v21.mode_programming.programming = dst_dml2_programming;
DC_FP_START();
/* need to initialize copied instance for internal references to be correct */
dml2_initialize_instance(&dst_dml_ctx->v21.dml_init);
DC_FP_END();
}
bool dml21_create_copy(struct dml2_context **dst_dml_ctx,

View File

@ -732,11 +732,16 @@ bool dml2_validate(const struct dc *in_dc, struct dc_state *context, struct dml2
return out;
}
DC_FP_START();
/* Use dml_validate_only for fast_validate path */
if (fast_validate)
out = dml2_validate_only(context);
else
out = dml2_validate_and_build_resource(in_dc, context);
DC_FP_END();
return out;
}
@ -779,11 +784,15 @@ static void dml2_init(const struct dc *in_dc, const struct dml2_configuration_op
break;
}
DC_FP_START();
initialize_dml2_ip_params(*dml2, in_dc, &(*dml2)->v20.dml_core_ctx.ip);
initialize_dml2_soc_bbox(*dml2, in_dc, &(*dml2)->v20.dml_core_ctx.soc);
initialize_dml2_soc_states(*dml2, in_dc, &(*dml2)->v20.dml_core_ctx.soc, &(*dml2)->v20.dml_core_ctx.states);
DC_FP_END();
}
bool dml2_create(const struct dc *in_dc, const struct dml2_configuration_options *config, struct dml2_context **dml2)

View File

@ -429,6 +429,7 @@ struct amd_pm_funcs {
int (*set_pp_table)(void *handle, const char *buf, size_t size);
void (*debugfs_print_current_performance_level)(void *handle, struct seq_file *m);
int (*switch_power_profile)(void *handle, enum PP_SMC_POWER_PROFILE type, bool en);
int (*pause_power_profile)(void *handle, bool pause);
/* export to amdgpu */
struct amd_vce_state *(*get_vce_clock_state)(void *handle, u32 idx);
int (*dispatch_tasks)(void *handle, enum amd_pp_task task_id,

View File

@ -349,6 +349,25 @@ int amdgpu_dpm_switch_power_profile(struct amdgpu_device *adev,
return ret;
}
int amdgpu_dpm_pause_power_profile(struct amdgpu_device *adev,
bool pause)
{
const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
int ret = 0;
if (amdgpu_sriov_vf(adev))
return 0;
if (pp_funcs && pp_funcs->pause_power_profile) {
mutex_lock(&adev->pm.mutex);
ret = pp_funcs->pause_power_profile(
adev->powerplay.pp_handle, pause);
mutex_unlock(&adev->pm.mutex);
}
return ret;
}
int amdgpu_dpm_set_xgmi_pstate(struct amdgpu_device *adev,
uint32_t pstate)
{

View File

@ -410,6 +410,8 @@ int amdgpu_dpm_set_xgmi_pstate(struct amdgpu_device *adev,
int amdgpu_dpm_switch_power_profile(struct amdgpu_device *adev,
enum PP_SMC_POWER_PROFILE type,
bool en);
int amdgpu_dpm_pause_power_profile(struct amdgpu_device *adev,
bool pause);
int amdgpu_dpm_baco_reset(struct amdgpu_device *adev);

View File

@ -2398,7 +2398,11 @@ static int smu_switch_power_profile(void *handle,
smu_power_profile_mode_get(smu, type);
else
smu_power_profile_mode_put(smu, type);
ret = smu_bump_power_profile_mode(smu, NULL, 0);
/* don't switch the active workload when paused */
if (smu->pause_workload)
ret = 0;
else
ret = smu_bump_power_profile_mode(smu, NULL, 0);
if (ret) {
if (enable)
smu_power_profile_mode_put(smu, type);
@ -2411,6 +2415,35 @@ static int smu_switch_power_profile(void *handle,
return 0;
}
static int smu_pause_power_profile(void *handle,
bool pause)
{
struct smu_context *smu = handle;
struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm);
u32 workload_mask = 1 << PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
int ret;
if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled)
return -EOPNOTSUPP;
if (smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL &&
smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) {
smu->pause_workload = pause;
/* force to bootup default profile */
if (smu->pause_workload && smu->ppt_funcs->set_power_profile_mode)
ret = smu->ppt_funcs->set_power_profile_mode(smu,
workload_mask,
NULL,
0);
else
ret = smu_bump_power_profile_mode(smu, NULL, 0);
return ret;
}
return 0;
}
static enum amd_dpm_forced_level smu_get_performance_level(void *handle)
{
struct smu_context *smu = handle;
@ -3733,6 +3766,7 @@ static const struct amd_pm_funcs swsmu_pm_funcs = {
.get_pp_table = smu_sys_get_pp_table,
.set_pp_table = smu_sys_set_pp_table,
.switch_power_profile = smu_switch_power_profile,
.pause_power_profile = smu_pause_power_profile,
/* export to amdgpu */
.dispatch_tasks = smu_handle_dpm_task,
.load_firmware = smu_load_microcode,

View File

@ -558,6 +558,7 @@ struct smu_context {
/* asic agnostic workload mask */
uint32_t workload_mask;
bool pause_workload;
/* default/user workload preference */
uint32_t power_profile_mode;
uint32_t workload_refcount[PP_SMC_POWER_PROFILE_COUNT];

View File

@ -1204,7 +1204,7 @@ int smu_v11_0_set_fan_speed_rpm(struct smu_context *smu,
uint32_t crystal_clock_freq = 2500;
uint32_t tach_period;
if (speed == 0)
if (!speed || speed > UINT_MAX/8)
return -EINVAL;
/*
* To prevent from possible overheat, some ASICs may have requirement