From d6a57588666301acd9d42d3b00d74240964f07f6 Mon Sep 17 00:00:00 2001 From: Jiadong Zhu Date: Fri, 1 Dec 2023 08:38:15 +0800 Subject: [PATCH 01/14] drm/amdgpu: disable MCBP by default Disable MCBP(mid command buffer preemption) by default as old Mesa hangs with it. We shall not enable the feature that breaks old usermode driver. Fixes: 50a7c8765ca6 ("drm/amdgpu: enable mcbp by default on gfx9") Signed-off-by: Jiadong Zhu Acked-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 5c0817cbc7c2..1cc1ae2743c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3791,10 +3791,6 @@ static void amdgpu_device_set_mcbp(struct amdgpu_device *adev) adev->gfx.mcbp = true; else if (amdgpu_mcbp == 0) adev->gfx.mcbp = false; - else if ((amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 0, 0)) && - (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 0, 0)) && - adev->gfx.num_gfx_rings) - adev->gfx.mcbp = true; if (amdgpu_sriov_vf(adev)) adev->gfx.mcbp = true; From fec05adc40c25a028c9dfa9d540f800a2d433f80 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Fri, 1 Dec 2023 06:25:07 -0700 Subject: [PATCH 02/14] drm/amd/display: Use channel_width = 2 for vram table 3.0 VBIOS has suggested to use channel_width=2 for any ASIC that uses vram info 3.0. This is because channel_width in the vram table no longer represents the memory width Tested-by: Daniel Wheeler Reviewed-by: Samson Tam Acked-by: Rodrigo Siqueira Signed-off-by: Alvin Lee Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index 7cdb1a8a0ba0..6a96810a477e 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -2386,7 +2386,13 @@ static enum bp_result get_vram_info_v30( return BP_RESULT_BADBIOSTABLE; info->num_chans = info_v30->channel_num; - info->dram_channel_width_bytes = (1 << info_v30->channel_width) / 8; + /* As suggested by VBIOS we should always use + * dram_channel_width_bytes = 2 when using VRAM + * table version 3.0. This is because the channel_width + * param in the VRAM info table is changed in 7000 series and + * no longer represents the memory channel width. + */ + info->dram_channel_width_bytes = 2; return result; } From 3d71a8726e05a35beb9de394e86ce896d69e563f Mon Sep 17 00:00:00 2001 From: Ivan Lipski Date: Fri, 1 Dec 2023 06:25:16 -0700 Subject: [PATCH 03/14] drm/amd/display: Add monitor patch for specific eDP [WHY] Some eDP panels's ext caps don't write initial value cause the value of dpcd_addr(0x317) is random. It means that sometimes the eDP will clarify it is OLED, miniLED...etc cause the backlight control interface is incorrect. [HOW] Add a new panel patch to remove sink ext caps(HDR,OLED...etc) Tested-by: Daniel Wheeler Reviewed-by: Sun peng Li Acked-by: Rodrigo Siqueira Signed-off-by: Ivan Lipski Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index c7a29bb737e2..aac98f93545a 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -63,6 +63,12 @@ static void apply_edid_quirks(struct edid *edid, struct dc_edid_caps *edid_caps) DRM_DEBUG_DRIVER("Disabling FAMS on monitor with panel id %X\n", panel_id); edid_caps->panel_patch.disable_fams = true; break; + /* Workaround for some monitors that do not clear DPCD 0x317 if FreeSync is unsupported */ + case drm_edid_encode_panel_id('A', 'U', 'O', 0xA7AB): + case drm_edid_encode_panel_id('A', 'U', 'O', 0xE69B): + DRM_DEBUG_DRIVER("Clearing DPCD 0x317 on monitor with panel id %X\n", panel_id); + edid_caps->panel_patch.remove_sink_ext_caps = true; + break; default: return; } From 9f7cb03e3c32613fb5891e10ce3ff9169b09ba69 Mon Sep 17 00:00:00 2001 From: Roman Li Date: Fri, 1 Dec 2023 06:25:40 -0700 Subject: [PATCH 04/14] drm/amd/display: Fix array-index-out-of-bounds in dml2 [Why] UBSAN errors observed in dmesg. array-index-out-of-bounds in dml2/display_mode_core.c [How] Fix the index. Tested-by: Daniel Wheeler Acked-by: Rodrigo Siqueira Signed-off-by: Roman Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c index 59718ee33e51..4d1336e5afc2 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c @@ -9447,12 +9447,12 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc // Output CalculateWatermarks_params->Watermark = &s->dummy_watermark; // Watermarks *Watermark - CalculateWatermarks_params->DRAMClockChangeSupport = &mode_lib->ms.support.DRAMClockChangeSupport[j]; + CalculateWatermarks_params->DRAMClockChangeSupport = &mode_lib->ms.support.DRAMClockChangeSupport[0]; CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = &s->dummy_single_array[0][0]; // dml_float_t *MaxActiveDRAMClockChangeLatencySupported[] CalculateWatermarks_params->SubViewportLinesNeededInMALL = &mode_lib->ms.SubViewportLinesNeededInMALL[j]; // dml_uint_t SubViewportLinesNeededInMALL[] - CalculateWatermarks_params->FCLKChangeSupport = &mode_lib->ms.support.FCLKChangeSupport[j]; + CalculateWatermarks_params->FCLKChangeSupport = &mode_lib->ms.support.FCLKChangeSupport[0]; CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &s->dummy_single[0]; // dml_float_t *MaxActiveFCLKChangeLatencySupported - CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->ms.support.USRRetrainingSupport[j]; + CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->ms.support.USRRetrainingSupport[0]; CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( &mode_lib->scratch, From 78825df90d427b26964bf9610eaac30542ee9e2d Mon Sep 17 00:00:00 2001 From: Li Ma Date: Tue, 14 Nov 2023 16:17:51 +0800 Subject: [PATCH 05/14] drm/amd/swsmu: update smu v14_0_0 driver if version and metrics table Increment the driver if version and add new mems to the mertics table. Signed-off-by: Li Ma Reviewed-by: Yifan Zhang Signed-off-by: Alex Deucher --- .../gpu/drm/amd/include/kgd_pp_interface.h | 17 ++++ drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 10 +++ .../inc/pmfw_if/smu14_driver_if_v14_0_0.h | 77 +++++++++++-------- .../drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c | 46 ++++++++++- 4 files changed, 115 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 0d1209f2cf31..1c5049e894e3 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -1085,6 +1085,10 @@ struct gpu_metrics_v3_0 { uint16_t average_dram_reads; /* time filtered DRAM write bandwidth [MB/sec] */ uint16_t average_dram_writes; + /* time filtered IPU read bandwidth [MB/sec] */ + uint16_t average_ipu_reads; + /* time filtered IPU write bandwidth [MB/sec] */ + uint16_t average_ipu_writes; /* Driver attached timestamp (in ns) */ uint64_t system_clock_counter; @@ -1104,6 +1108,8 @@ struct gpu_metrics_v3_0 { uint32_t average_all_core_power; /* calculated core power [mW] */ uint16_t average_core_power[16]; + /* time filtered total system power [mW] */ + uint16_t average_sys_power; /* maximum IRM defined STAPM power limit [mW] */ uint16_t stapm_power_limit; /* time filtered STAPM power limit [mW] */ @@ -1116,6 +1122,8 @@ struct gpu_metrics_v3_0 { uint16_t average_ipuclk_frequency; uint16_t average_fclk_frequency; uint16_t average_vclk_frequency; + uint16_t average_uclk_frequency; + uint16_t average_mpipu_frequency; /* Current clocks */ /* target core frequency [MHz] */ @@ -1125,6 +1133,15 @@ struct gpu_metrics_v3_0 { /* GFXCLK frequency limit enforced on GFX [MHz] */ uint16_t current_gfx_maxfreq; + /* Throttle Residency (ASIC dependent) */ + uint32_t throttle_residency_prochot; + uint32_t throttle_residency_spl; + uint32_t throttle_residency_fppt; + uint32_t throttle_residency_sppt; + uint32_t throttle_residency_thm_core; + uint32_t throttle_residency_thm_gfx; + uint32_t throttle_residency_thm_soc; + /* Metrics table alpha filter time constant [us] */ uint32_t time_filter_alphavalue; }; diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index 23fa71cafb14..f8b2e6cc2568 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -1408,6 +1408,16 @@ typedef enum { METRICS_PCIE_WIDTH, METRICS_CURR_FANPWM, METRICS_CURR_SOCKETPOWER, + METRICS_AVERAGE_VPECLK, + METRICS_AVERAGE_IPUCLK, + METRICS_AVERAGE_MPIPUCLK, + METRICS_THROTTLER_RESIDENCY_PROCHOT, + METRICS_THROTTLER_RESIDENCY_SPL, + METRICS_THROTTLER_RESIDENCY_FPPT, + METRICS_THROTTLER_RESIDENCY_SPPT, + METRICS_THROTTLER_RESIDENCY_THM_CORE, + METRICS_THROTTLER_RESIDENCY_THM_GFX, + METRICS_THROTTLER_RESIDENCY_THM_SOC, } MetricsMember_t; enum smu_cmn2asic_mapping_type { diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h index 22f88842a7fd..8f42771e1f0a 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h @@ -27,7 +27,7 @@ // *** IMPORTANT *** // SMU TEAM: Always increment the interface version if // any structure is changed in this file -#define PMFW_DRIVER_IF_VERSION 6 +#define PMFW_DRIVER_IF_VERSION 7 typedef struct { int32_t value; @@ -150,37 +150,50 @@ typedef struct { } DpmClocks_t; typedef struct { - uint16_t CoreFrequency[16]; //Target core frequency [MHz] - uint16_t CorePower[16]; //CAC calculated core power [mW] - uint16_t CoreTemperature[16]; //TSEN measured core temperature [centi-C] - uint16_t GfxTemperature; //TSEN measured GFX temperature [centi-C] - uint16_t SocTemperature; //TSEN measured SOC temperature [centi-C] - uint16_t StapmOpnLimit; //Maximum IRM defined STAPM power limit [mW] - uint16_t StapmCurrentLimit; //Time filtered STAPM power limit [mW] - uint16_t InfrastructureCpuMaxFreq; //CCLK frequency limit enforced on classic cores [MHz] - uint16_t InfrastructureGfxMaxFreq; //GFXCLK frequency limit enforced on GFX [MHz] - uint16_t SkinTemp; //Maximum skin temperature reported by APU and HS2 chassis sensors [centi-C] - uint16_t GfxclkFrequency; //Time filtered target GFXCLK frequency [MHz] - uint16_t FclkFrequency; //Time filtered target FCLK frequency [MHz] - uint16_t GfxActivity; //Time filtered GFX busy % [0-100] - uint16_t SocclkFrequency; //Time filtered target SOCCLK frequency [MHz] - uint16_t VclkFrequency; //Time filtered target VCLK frequency [MHz] - uint16_t VcnActivity; //Time filtered VCN busy % [0-100] - uint16_t VpeclkFrequency; //Time filtered target VPECLK frequency [MHz] - uint16_t IpuclkFrequency; //Time filtered target IPUCLK frequency [MHz] - uint16_t IpuBusy[8]; //Time filtered IPU per-column busy % [0-100] - uint16_t DRAMReads; //Time filtered DRAM read bandwidth [MB/sec] - uint16_t DRAMWrites; //Time filtered DRAM write bandwidth [MB/sec] - uint16_t CoreC0Residency[16]; //Time filtered per-core C0 residency % [0-100] - uint16_t IpuPower; //Time filtered IPU power [mW] - uint32_t ApuPower; //Time filtered APU power [mW] - uint32_t GfxPower; //Time filtered GFX power [mW] - uint32_t dGpuPower; //Time filtered dGPU power [mW] - uint32_t SocketPower; //Time filtered power used for PPT/STAPM [APU+dGPU] [mW] - uint32_t AllCorePower; //Time filtered sum of core power across all cores in the socket [mW] - uint32_t FilterAlphaValue; //Metrics table alpha filter time constant [us] - uint32_t MetricsCounter; //Counter that is incremented on every metrics table update [PM_TIMER cycles] - uint32_t spare[16]; + uint16_t CoreFrequency[16]; //Target core frequency [MHz] + uint16_t CorePower[16]; //CAC calculated core power [mW] + uint16_t CoreTemperature[16]; //TSEN measured core temperature [centi-C] + uint16_t GfxTemperature; //TSEN measured GFX temperature [centi-C] + uint16_t SocTemperature; //TSEN measured SOC temperature [centi-C] + uint16_t StapmOpnLimit; //Maximum IRM defined STAPM power limit [mW] + uint16_t StapmCurrentLimit; //Time filtered STAPM power limit [mW] + uint16_t InfrastructureCpuMaxFreq; //CCLK frequency limit enforced on classic cores [MHz] + uint16_t InfrastructureGfxMaxFreq; //GFXCLK frequency limit enforced on GFX [MHz] + uint16_t SkinTemp; //Maximum skin temperature reported by APU and HS2 chassis sensors [centi-C] + uint16_t GfxclkFrequency; //Time filtered target GFXCLK frequency [MHz] + uint16_t FclkFrequency; //Time filtered target FCLK frequency [MHz] + uint16_t GfxActivity; //Time filtered GFX busy % [0-100] + uint16_t SocclkFrequency; //Time filtered target SOCCLK frequency [MHz] + uint16_t VclkFrequency; //Time filtered target VCLK frequency [MHz] + uint16_t VcnActivity; //Time filtered VCN busy % [0-100] + uint16_t VpeclkFrequency; //Time filtered target VPECLK frequency [MHz] + uint16_t IpuclkFrequency; //Time filtered target IPUCLK frequency [MHz] + uint16_t IpuBusy[8]; //Time filtered IPU per-column busy % [0-100] + uint16_t DRAMReads; //Time filtered DRAM read bandwidth [MB/sec] + uint16_t DRAMWrites; //Time filtered DRAM write bandwidth [MB/sec] + uint16_t CoreC0Residency[16]; //Time filtered per-core C0 residency % [0-100] + uint16_t IpuPower; //Time filtered IPU power [mW] + uint32_t ApuPower; //Time filtered APU power [mW] + uint32_t GfxPower; //Time filtered GFX power [mW] + uint32_t dGpuPower; //Time filtered dGPU power [mW] + uint32_t SocketPower; //Time filtered power used for PPT/STAPM [APU+dGPU] [mW] + uint32_t AllCorePower; //Time filtered sum of core power across all cores in the socket [mW] + uint32_t FilterAlphaValue; //Metrics table alpha filter time constant [us] + uint32_t MetricsCounter; //Counter that is incremented on every metrics table update [PM_TIMER cycles] + uint16_t MemclkFrequency; //Time filtered target MEMCLK frequency [MHz] + uint16_t MpipuclkFrequency; //Time filtered target MPIPUCLK frequency [MHz] + uint16_t IpuReads; //Time filtered IPU read bandwidth [MB/sec] + uint16_t IpuWrites; //Time filtered IPU write bandwidth [MB/sec] + uint32_t ThrottleResidency_PROCHOT; //Counter that is incremented on every metrics table update when PROCHOT was engaged [PM_TIMER cycles] + uint32_t ThrottleResidency_SPL; //Counter that is incremented on every metrics table update when SPL was engaged [PM_TIMER cycles] + uint32_t ThrottleResidency_FPPT; //Counter that is incremented on every metrics table update when fast PPT was engaged [PM_TIMER cycles] + uint32_t ThrottleResidency_SPPT; //Counter that is incremented on every metrics table update when slow PPT was engaged [PM_TIMER cycles] + uint32_t ThrottleResidency_THM_CORE; //Counter that is incremented on every metrics table update when CORE thermal throttling was engaged [PM_TIMER cycles] + uint32_t ThrottleResidency_THM_GFX; //Counter that is incremented on every metrics table update when GFX thermal throttling was engaged [PM_TIMER cycles] + uint32_t ThrottleResidency_THM_SOC; //Counter that is incremented on every metrics table update when SOC thermal throttling was engaged [PM_TIMER cycles] + uint16_t Psys; //Time filtered Psys power [mW] + uint16_t spare1; + uint32_t spare[6]; } SmuMetrics_t; //ISP tile definitions diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c index 03b38c3a9968..94ccdbfd7090 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c @@ -246,11 +246,20 @@ static int smu_v14_0_0_get_smu_metrics_data(struct smu_context *smu, *value = 0; break; case METRICS_AVERAGE_UCLK: - *value = 0; + *value = metrics->MemclkFrequency; break; case METRICS_AVERAGE_FCLK: *value = metrics->FclkFrequency; break; + case METRICS_AVERAGE_VPECLK: + *value = metrics->VpeclkFrequency; + break; + case METRICS_AVERAGE_IPUCLK: + *value = metrics->IpuclkFrequency; + break; + case METRICS_AVERAGE_MPIPUCLK: + *value = metrics->MpipuclkFrequency; + break; case METRICS_AVERAGE_GFXACTIVITY: *value = metrics->GfxActivity / 100; break; @@ -270,8 +279,26 @@ static int smu_v14_0_0_get_smu_metrics_data(struct smu_context *smu, *value = metrics->SocTemperature / 100 * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; break; - case METRICS_THROTTLER_STATUS: - *value = 0; + case METRICS_THROTTLER_RESIDENCY_PROCHOT: + *value = metrics->ThrottleResidency_PROCHOT; + break; + case METRICS_THROTTLER_RESIDENCY_SPL: + *value = metrics->ThrottleResidency_SPL; + break; + case METRICS_THROTTLER_RESIDENCY_FPPT: + *value = metrics->ThrottleResidency_FPPT; + break; + case METRICS_THROTTLER_RESIDENCY_SPPT: + *value = metrics->ThrottleResidency_SPPT; + break; + case METRICS_THROTTLER_RESIDENCY_THM_CORE: + *value = metrics->ThrottleResidency_THM_CORE; + break; + case METRICS_THROTTLER_RESIDENCY_THM_GFX: + *value = metrics->ThrottleResidency_THM_GFX; + break; + case METRICS_THROTTLER_RESIDENCY_THM_SOC: + *value = metrics->ThrottleResidency_THM_SOC; break; case METRICS_VOLTAGE_VDDGFX: *value = 0; @@ -498,6 +525,8 @@ static ssize_t smu_v14_0_0_get_gpu_metrics(struct smu_context *smu, sizeof(uint16_t) * 16); gpu_metrics->average_dram_reads = metrics.DRAMReads; gpu_metrics->average_dram_writes = metrics.DRAMWrites; + gpu_metrics->average_ipu_reads = metrics.IpuReads; + gpu_metrics->average_ipu_writes = metrics.IpuWrites; gpu_metrics->average_socket_power = metrics.SocketPower; gpu_metrics->average_ipu_power = metrics.IpuPower; @@ -505,6 +534,7 @@ static ssize_t smu_v14_0_0_get_gpu_metrics(struct smu_context *smu, gpu_metrics->average_gfx_power = metrics.GfxPower; gpu_metrics->average_dgpu_power = metrics.dGpuPower; gpu_metrics->average_all_core_power = metrics.AllCorePower; + gpu_metrics->average_sys_power = metrics.Psys; memcpy(&gpu_metrics->average_core_power[0], &metrics.CorePower[0], sizeof(uint16_t) * 16); @@ -515,6 +545,8 @@ static ssize_t smu_v14_0_0_get_gpu_metrics(struct smu_context *smu, gpu_metrics->average_fclk_frequency = metrics.FclkFrequency; gpu_metrics->average_vclk_frequency = metrics.VclkFrequency; gpu_metrics->average_ipuclk_frequency = metrics.IpuclkFrequency; + gpu_metrics->average_uclk_frequency = metrics.MemclkFrequency; + gpu_metrics->average_mpipu_frequency = metrics.MpipuclkFrequency; memcpy(&gpu_metrics->current_coreclk[0], &metrics.CoreFrequency[0], @@ -522,6 +554,14 @@ static ssize_t smu_v14_0_0_get_gpu_metrics(struct smu_context *smu, gpu_metrics->current_core_maxfreq = metrics.InfrastructureCpuMaxFreq; gpu_metrics->current_gfx_maxfreq = metrics.InfrastructureGfxMaxFreq; + gpu_metrics->throttle_residency_prochot = metrics.ThrottleResidency_PROCHOT; + gpu_metrics->throttle_residency_spl = metrics.ThrottleResidency_SPL; + gpu_metrics->throttle_residency_fppt = metrics.ThrottleResidency_FPPT; + gpu_metrics->throttle_residency_sppt = metrics.ThrottleResidency_SPPT; + gpu_metrics->throttle_residency_thm_core = metrics.ThrottleResidency_THM_CORE; + gpu_metrics->throttle_residency_thm_gfx = metrics.ThrottleResidency_THM_GFX; + gpu_metrics->throttle_residency_thm_soc = metrics.ThrottleResidency_THM_SOC; + gpu_metrics->time_filter_alphavalue = metrics.FilterAlphaValue; gpu_metrics->system_clock_counter = ktime_get_boottime_ns(); From 37c57631c18661c4c0dc415e75afd143ed89e098 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Mon, 4 Dec 2023 10:17:57 +0800 Subject: [PATCH 06/14] drm/amd/pm: support new mca smu error code decoding support new mca smu error code decoding from smu 85.86.0 for smu v13.0.6 Signed-off-by: Yang Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h | 2 ++ drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 9 ++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h index 2b488fcf2f95..e51e8918e667 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h @@ -46,6 +46,8 @@ #define MCA_REG__STATUS__ERRORCODEEXT(x) MCA_REG_FIELD(x, 21, 16) #define MCA_REG__STATUS__ERRORCODE(x) MCA_REG_FIELD(x, 15, 0) +#define MCA_REG__SYND__ERRORINFORMATION(x) MCA_REG_FIELD(x, 17, 0) + enum amdgpu_mca_ip { AMDGPU_MCA_IP_UNKNOW = -1, AMDGPU_MCA_IP_PSP = 0, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 0e5a77c3c2e2..900a2d9e6d85 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -2593,13 +2593,20 @@ static bool mca_gfx_smu_bank_is_valid(const struct mca_ras_info *mca_ras, struct static bool mca_smu_bank_is_valid(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev, enum amdgpu_mca_error_type type, struct mca_bank_entry *entry) { + struct smu_context *smu = adev->powerplay.pp_handle; uint32_t errcode, instlo; instlo = REG_GET_FIELD(entry->regs[MCA_REG_IDX_IPID], MCMP1_IPIDT0, InstanceIdLo); if (instlo != 0x03b30400) return false; - errcode = REG_GET_FIELD(entry->regs[MCA_REG_IDX_STATUS], MCMP1_STATUST0, ErrorCode); + if (!(adev->flags & AMD_IS_APU) && smu->smc_fw_version >= 0x00555600) { + errcode = MCA_REG__SYND__ERRORINFORMATION(entry->regs[MCA_REG_IDX_SYND]); + errcode &= 0xff; + } else { + errcode = REG_GET_FIELD(entry->regs[MCA_REG_IDX_STATUS], MCMP1_STATUST0, ErrorCode); + } + return mca_smu_check_error_code(adev, mca_ras, errcode); } From 0e8af20517197934cc04f8e361c6bbe198c327fd Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Mon, 20 Nov 2023 10:43:02 +0800 Subject: [PATCH 07/14] drm/amdgpu: Update fw version for boot time error query Boot time error query is not available until fw a10109 Signed-off-by: Hawking Zhang Reviewed-by: Stanley Yang Reviewed-by: Yang Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/psp_v13_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index 3cf4684d0d3f..5f46877f78cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -821,7 +821,7 @@ static int psp_v13_0_query_boot_status(struct psp_context *psp) if (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 6)) return 0; - if (RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_59) < 0x00a10007) + if (RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_59) < 0x00a10109) return 0; for_each_inst(i, inst_mask) { From dbf3850d12baf3ba8a80c302f538d1b01940aef7 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Mon, 4 Dec 2023 10:44:32 +0800 Subject: [PATCH 08/14] drm/amdgpu: optimize the printing order of error data sort error data list to optimize the printing order. Signed-off-by: Yang Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index a3dc68e98910..63fb4cd85e53 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "amdgpu.h" #include "amdgpu_ras.h" @@ -3665,6 +3666,21 @@ static struct ras_err_node *amdgpu_ras_error_node_new(void) return err_node; } +static int ras_err_info_cmp(void *priv, const struct list_head *a, const struct list_head *b) +{ + struct ras_err_node *nodea = container_of(a, struct ras_err_node, node); + struct ras_err_node *nodeb = container_of(b, struct ras_err_node, node); + struct amdgpu_smuio_mcm_config_info *infoa = &nodea->err_info.mcm_info; + struct amdgpu_smuio_mcm_config_info *infob = &nodeb->err_info.mcm_info; + + if (unlikely(infoa->socket_id != infob->socket_id)) + return infoa->socket_id - infob->socket_id; + else + return infoa->die_id - infob->die_id; + + return 0; +} + static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_data, struct amdgpu_smuio_mcm_config_info *mcm_info) { @@ -3682,6 +3698,7 @@ static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_d err_data->err_list_count++; list_add_tail(&err_node->node, &err_data->err_node_list); + list_sort(NULL, &err_data->err_node_list, ras_err_info_cmp); return &err_node->err_info; } From 5b750b22530fe53bf7fd6a30baacd53ada26911b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 30 Nov 2023 17:34:07 -0500 Subject: [PATCH 09/14] drm/amd/display: Increase frame warning limit with KASAN or KCSAN in dml Does the same thing as: commit 6740ec97bcdb ("drm/amd/display: Increase frame warning limit with KASAN or KCSAN in dml2") Reviewed-by: Harry Wentland Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202311302107.hUDXVyWT-lkp@intel.com/ Fixes: 67e38874b85b ("drm/amd/display: Increase num voltage states to 40") Signed-off-by: Alex Deucher Cc: Alvin Lee Cc: Hamza Mahfooz Cc: Samson Tam Cc: Harry Wentland --- drivers/gpu/drm/amd/display/dc/dml/Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index ea7d60f9a9b4..6042a5a6a44f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -61,8 +61,12 @@ endif endif ifneq ($(CONFIG_FRAME_WARN),0) +ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y) +frame_warn_flag := -Wframe-larger-than=3072 +else frame_warn_flag := -Wframe-larger-than=2048 endif +endif CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_ccflags) From 6fce23a4d8c5f93bf80b7f122449fbb97f1e40dd Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Wed, 29 Nov 2023 18:06:55 +0530 Subject: [PATCH 10/14] drm/amdgpu: Restrict extended wait to PSP v13.0.6 Only PSPv13.0.6 SOCs take a longer time to reach steady state. Other PSPv13 based SOCs don't need extended wait. Also, reduce PSPv13.0.6 wait time. Cc: stable@vger.kernel.org Fixes: fc5988907156 ("drm/amdgpu: update retry times for psp vmbx wait") Fixes: d8c1925ba8cd ("drm/amdgpu: update retry times for psp BL wait") Link: https://lore.kernel.org/amd-gfx/34dd4c66-f7bf-44aa-af8f-c82889dd652c@amd.com/ Signed-off-by: Lijo Lazar Reviewed-by: Asad Kamal Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/psp_v13_0.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index 5f46877f78cf..df1844d0800f 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -60,7 +60,7 @@ MODULE_FIRMWARE("amdgpu/psp_14_0_0_ta.bin"); #define GFX_CMD_USB_PD_USE_LFB 0x480 /* Retry times for vmbx ready wait */ -#define PSP_VMBX_POLLING_LIMIT 20000 +#define PSP_VMBX_POLLING_LIMIT 3000 /* VBIOS gfl defines */ #define MBOX_READY_MASK 0x80000000 @@ -161,14 +161,18 @@ static int psp_v13_0_wait_for_vmbx_ready(struct psp_context *psp) static int psp_v13_0_wait_for_bootloader(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; - int retry_loop, ret; + int retry_loop, retry_cnt, ret; + retry_cnt = + (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6)) ? + PSP_VMBX_POLLING_LIMIT : + 10; /* Wait for bootloader to signify that it is ready having bit 31 of * C2PMSG_35 set to 1. All other bits are expected to be cleared. * If there is an error in processing command, bits[7:0] will be set. * This is applicable for PSP v13.0.6 and newer. */ - for (retry_loop = 0; retry_loop < PSP_VMBX_POLLING_LIMIT; retry_loop++) { + for (retry_loop = 0; retry_loop < retry_cnt; retry_loop++) { ret = psp_wait_for( psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35), 0x80000000, 0xffffffff, false); From 81577503efb49f4ad76af22f9941d72900ef4aab Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Wed, 29 Nov 2023 12:37:34 +0530 Subject: [PATCH 11/14] drm/amdgpu: Add NULL checks for function pointers Check if function is implemented before making the call. Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc15.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index c82776e5e9aa..edd6344d1e2b 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1423,9 +1423,11 @@ static void soc15_common_get_clockgating_state(void *handle, u64 *flags) if (amdgpu_sriov_vf(adev)) *flags = 0; - adev->nbio.funcs->get_clockgating_state(adev, flags); + if (adev->nbio.funcs && adev->nbio.funcs->get_clockgating_state) + adev->nbio.funcs->get_clockgating_state(adev, flags); - adev->hdp.funcs->get_clock_gating_state(adev, flags); + if (adev->hdp.funcs && adev->hdp.funcs->get_clock_gating_state) + adev->hdp.funcs->get_clock_gating_state(adev, flags); if (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 2)) { /* AMD_CG_SUPPORT_DRM_MGCG */ @@ -1440,9 +1442,11 @@ static void soc15_common_get_clockgating_state(void *handle, u64 *flags) } /* AMD_CG_SUPPORT_ROM_MGCG */ - adev->smuio.funcs->get_clock_gating_state(adev, flags); + if (adev->smuio.funcs && adev->smuio.funcs->get_clock_gating_state) + adev->smuio.funcs->get_clock_gating_state(adev, flags); - adev->df.funcs->get_clockgating_state(adev, flags); + if (adev->df.funcs && adev->df.funcs->get_clockgating_state) + adev->df.funcs->get_clockgating_state(adev, flags); } static int soc15_common_set_powergating_state(void *handle, From 555e39f0270b1a1c51224044be9922b4c3a4c27f Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Thu, 30 Nov 2023 15:58:21 +0530 Subject: [PATCH 12/14] drm/amdgpu: Update HDP 4.4.2 clock gating flags HDP 4.4.2 clockgating is enabled by default, update the flags accordingly. Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c index 49e934975719..4db6bb73ead4 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c @@ -129,6 +129,11 @@ static void hdp_v4_0_get_clockgating_state(struct amdgpu_device *adev, { int data; + if (amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 2)) { + /* Default enabled */ + *flags |= AMD_CG_SUPPORT_HDP_MGCG; + return; + } /* AMD_CG_SUPPORT_HDP_LS */ data = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_LS)); if (data & HDP_MEM_POWER_LS__LS_ENABLE_MASK) From 27b024a88acba17c8e3a71ff4fd425064851e3b7 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Thu, 30 Nov 2023 16:35:58 +0530 Subject: [PATCH 13/14] drm/amdgpu: Avoid querying DRM MGCG status MP0 v13.0.6 SOCs don't support DRM MGCG. Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc15.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index edd6344d1e2b..51342809af03 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1429,7 +1429,8 @@ static void soc15_common_get_clockgating_state(void *handle, u64 *flags) if (adev->hdp.funcs && adev->hdp.funcs->get_clock_gating_state) adev->hdp.funcs->get_clock_gating_state(adev, flags); - if (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 2)) { + if ((amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 2)) && + (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 6))) { /* AMD_CG_SUPPORT_DRM_MGCG */ data = RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_MISC_CGTT_CTRL0)); if (!(data & 0x01000000)) From dab96d8b61aab1a4f99d0b86964a6c40e7bb1756 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 29 Nov 2023 15:44:25 -0500 Subject: [PATCH 14/14] drm/amdgpu: fix buffer funcs setting order on suspend We need to disable this after the last eviction call, but before we disable the SDMA IP. Fixes: b70438004a14 ("drm/amdgpu: move buffer funcs setting up a level") Link: https://lore.kernel.org/r/87edgv4x3i.fsf@vps.thesusis.net Reviewed-by: Luben Tuikov Tested-by: Phillip Susi Signed-off-by: Alex Deucher Cc: Phillip Susi Cc: Luben Tuikov --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 1cc1ae2743c1..1f64d8cbb14d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4527,6 +4527,8 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) if (r) return r; + amdgpu_ttm_set_buffer_funcs_status(adev, false); + amdgpu_fence_driver_hw_fini(adev); amdgpu_device_ip_suspend_phase2(adev);