From c154a96b550b66bdf032216cab613269cb91e79e Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Fri, 25 Jul 2025 10:47:35 +0800 Subject: [PATCH 01/67] drm/amdgpu: load RAS bad page from PMFW in page retirement In legacy way, bad page is queried from MCA registers, switch to getting it from PMFW when PMFW manages eeprom data. Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 8 +- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 131 +++++++++++++++--------- 2 files changed, 87 insertions(+), 52 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 055a9bbabbdb..36a5393d6b74 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -3300,7 +3300,13 @@ int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev, mutex_lock(&con->recovery_lock); control = &con->eeprom_control; data = con->eh_data; - unit_num = data->count / adev->umc.retire_unit - control->ras_num_recs; + if (amdgpu_ras_smu_eeprom_supported(adev)) + unit_num = control->ras_num_recs - + control->ras_num_recs_old; + else + unit_num = data->count / adev->umc.retire_unit - + control->ras_num_recs; + save_count = con->bad_page_num - control->ras_num_bad_pages; mutex_unlock(&con->recovery_lock); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index 2e039fb778ea..3eb252de343b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -96,67 +96,96 @@ void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev, { struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct amdgpu_ras_eeprom_control *control = &con->eeprom_control; unsigned int error_query_mode; int ret = 0; unsigned long err_count; amdgpu_ras_get_error_query_mode(adev, &error_query_mode); + err_data->err_addr = + kcalloc(adev->umc.max_ras_err_cnt_per_query, + sizeof(struct eeprom_table_record), GFP_KERNEL); + + /* still call query_ras_error_address to clear error status + * even NOMEM error is encountered + */ + if (!err_data->err_addr) + dev_warn(adev->dev, + "Failed to alloc memory for umc error address record!\n"); + else + err_data->err_addr_len = adev->umc.max_ras_err_cnt_per_query; + mutex_lock(&con->page_retirement_lock); - ret = amdgpu_dpm_get_ecc_info(adev, (void *)&(con->umc_ecc)); - if (ret == -EOPNOTSUPP && - error_query_mode == AMDGPU_RAS_DIRECT_ERROR_QUERY) { - if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops && - adev->umc.ras->ras_block.hw_ops->query_ras_error_count) - adev->umc.ras->ras_block.hw_ops->query_ras_error_count(adev, ras_error_status); + if (!amdgpu_ras_smu_eeprom_supported(adev)) { + ret = amdgpu_dpm_get_ecc_info(adev, (void *)&(con->umc_ecc)); + if (ret == -EOPNOTSUPP && + error_query_mode == AMDGPU_RAS_DIRECT_ERROR_QUERY) { + if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops && + adev->umc.ras->ras_block.hw_ops->query_ras_error_count) + adev->umc.ras->ras_block.hw_ops->query_ras_error_count(adev, + ras_error_status); - if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops && - adev->umc.ras->ras_block.hw_ops->query_ras_error_address && - adev->umc.max_ras_err_cnt_per_query) { - err_data->err_addr = - kcalloc(adev->umc.max_ras_err_cnt_per_query, - sizeof(struct eeprom_table_record), GFP_KERNEL); + if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops && + adev->umc.ras->ras_block.hw_ops->query_ras_error_address && + adev->umc.max_ras_err_cnt_per_query) { + err_data->err_addr = + kcalloc(adev->umc.max_ras_err_cnt_per_query, + sizeof(struct eeprom_table_record), GFP_KERNEL); - /* still call query_ras_error_address to clear error status - * even NOMEM error is encountered - */ - if(!err_data->err_addr) - dev_warn(adev->dev, "Failed to alloc memory for " - "umc error address record!\n"); - else - err_data->err_addr_len = adev->umc.max_ras_err_cnt_per_query; + /* still call query_ras_error_address to clear error status + * even NOMEM error is encountered + */ + if (!err_data->err_addr) + dev_warn(adev->dev, + "Failed to alloc memory for umc error address record!\n"); + else + err_data->err_addr_len = + adev->umc.max_ras_err_cnt_per_query; - /* umc query_ras_error_address is also responsible for clearing - * error status - */ - adev->umc.ras->ras_block.hw_ops->query_ras_error_address(adev, ras_error_status); + /* umc query_ras_error_address is also responsible for clearing + * error status + */ + adev->umc.ras->ras_block.hw_ops->query_ras_error_address(adev, + ras_error_status); + } + } else if (error_query_mode == AMDGPU_RAS_FIRMWARE_ERROR_QUERY || + (!ret && error_query_mode == AMDGPU_RAS_DIRECT_ERROR_QUERY)) { + if (adev->umc.ras && + adev->umc.ras->ecc_info_query_ras_error_count) + adev->umc.ras->ecc_info_query_ras_error_count(adev, + ras_error_status); + + if (adev->umc.ras && + adev->umc.ras->ecc_info_query_ras_error_address && + adev->umc.max_ras_err_cnt_per_query) { + err_data->err_addr = + kcalloc(adev->umc.max_ras_err_cnt_per_query, + sizeof(struct eeprom_table_record), GFP_KERNEL); + + /* still call query_ras_error_address to clear error status + * even NOMEM error is encountered + */ + if (!err_data->err_addr) + dev_warn(adev->dev, + "Failed to alloc memory for umc error address record!\n"); + else + err_data->err_addr_len = + adev->umc.max_ras_err_cnt_per_query; + + /* umc query_ras_error_address is also responsible for clearing + * error status + */ + adev->umc.ras->ecc_info_query_ras_error_address(adev, + ras_error_status); + } } - } else if (error_query_mode == AMDGPU_RAS_FIRMWARE_ERROR_QUERY || - (!ret && error_query_mode == AMDGPU_RAS_DIRECT_ERROR_QUERY)) { - if (adev->umc.ras && - adev->umc.ras->ecc_info_query_ras_error_count) - adev->umc.ras->ecc_info_query_ras_error_count(adev, ras_error_status); - - if (adev->umc.ras && - adev->umc.ras->ecc_info_query_ras_error_address && - adev->umc.max_ras_err_cnt_per_query) { - err_data->err_addr = - kcalloc(adev->umc.max_ras_err_cnt_per_query, - sizeof(struct eeprom_table_record), GFP_KERNEL); - - /* still call query_ras_error_address to clear error status - * even NOMEM error is encountered - */ - if(!err_data->err_addr) - dev_warn(adev->dev, "Failed to alloc memory for " - "umc error address record!\n"); - else - err_data->err_addr_len = adev->umc.max_ras_err_cnt_per_query; - - /* umc query_ras_error_address is also responsible for clearing - * error status - */ - adev->umc.ras->ecc_info_query_ras_error_address(adev, ras_error_status); + } else { + if (!amdgpu_ras_eeprom_update_record_num(control)) { + err_data->err_addr_cnt = err_data->de_count = + control->ras_num_recs - control->ras_num_recs_old; + amdgpu_ras_eeprom_read_idx(control, err_data->err_addr, + control->ras_num_recs_old, err_data->de_count); } } @@ -166,7 +195,7 @@ void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev, if ((amdgpu_bad_page_threshold != 0) && err_data->err_addr_cnt) { amdgpu_ras_add_bad_pages(adev, err_data->err_addr, - err_data->err_addr_cnt, false); + err_data->err_addr_cnt, amdgpu_ras_smu_eeprom_supported(adev)); amdgpu_ras_save_bad_pages(adev, &err_count); amdgpu_dpm_send_hbm_bad_pages_num(adev, From 815e260a18a3af4dab59025ee99a7156c0e8b5e0 Mon Sep 17 00:00:00 2001 From: Wenjing Liu Date: Thu, 6 Nov 2025 13:49:18 +0800 Subject: [PATCH 02/67] drm/amd/display: add macros to simplify code [Why & How] Adding macros to simplify the process of adding new error codes. Currently, to add an error code, the developer needs to add both the enum and the string translation. This is error prone and can lead to inconsistencies. The refactor adds a macro to automatically add the string translation based on the enum. Reviewed-by: Aric Cyr Signed-off-by: Wenjing Liu Signed-off-by: Ray Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../drm/amd/display/modules/hdcp/hdcp_log.c | 125 +---------------- .../drm/amd/display/modules/inc/mod_hdcp.h | 126 +++++++++--------- 2 files changed, 68 insertions(+), 183 deletions(-) diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.c index ac44ee1532fd..409a7d0e70fa 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.c @@ -125,131 +125,12 @@ void mod_hdcp_log_ddc_trace(struct mod_hdcp *hdcp) } } +#define CASE_FORMAT(entry) case entry: return #entry; + char *mod_hdcp_status_to_str(int32_t status) { switch (status) { - case MOD_HDCP_STATUS_SUCCESS: - return "MOD_HDCP_STATUS_SUCCESS"; - case MOD_HDCP_STATUS_FAILURE: - return "MOD_HDCP_STATUS_FAILURE"; - case MOD_HDCP_STATUS_RESET_NEEDED: - return "MOD_HDCP_STATUS_RESET_NEEDED"; - case MOD_HDCP_STATUS_DISPLAY_OUT_OF_BOUND: - return "MOD_HDCP_STATUS_DISPLAY_OUT_OF_BOUND"; - case MOD_HDCP_STATUS_DISPLAY_NOT_FOUND: - return "MOD_HDCP_STATUS_DISPLAY_NOT_FOUND"; - case MOD_HDCP_STATUS_INVALID_STATE: - return "MOD_HDCP_STATUS_INVALID_STATE"; - case MOD_HDCP_STATUS_NOT_IMPLEMENTED: - return "MOD_HDCP_STATUS_NOT_IMPLEMENTED"; - case MOD_HDCP_STATUS_INTERNAL_POLICY_FAILURE: - return "MOD_HDCP_STATUS_INTERNAL_POLICY_FAILURE"; - case MOD_HDCP_STATUS_UPDATE_TOPOLOGY_FAILURE: - return "MOD_HDCP_STATUS_UPDATE_TOPOLOGY_FAILURE"; - case MOD_HDCP_STATUS_CREATE_PSP_SERVICE_FAILURE: - return "MOD_HDCP_STATUS_CREATE_PSP_SERVICE_FAILURE"; - case MOD_HDCP_STATUS_DESTROY_PSP_SERVICE_FAILURE: - return "MOD_HDCP_STATUS_DESTROY_PSP_SERVICE_FAILURE"; - case MOD_HDCP_STATUS_HDCP1_CREATE_SESSION_FAILURE: - return "MOD_HDCP_STATUS_HDCP1_CREATE_SESSION_FAILURE"; - case MOD_HDCP_STATUS_HDCP1_DESTROY_SESSION_FAILURE: - return "MOD_HDCP_STATUS_HDCP1_DESTROY_SESSION_FAILURE"; - case MOD_HDCP_STATUS_HDCP1_VALIDATE_ENCRYPTION_FAILURE: - return "MOD_HDCP_STATUS_HDCP1_VALIDATE_ENCRYPTION_FAILURE"; - case MOD_HDCP_STATUS_HDCP1_NOT_HDCP_REPEATER: - return "MOD_HDCP_STATUS_HDCP1_NOT_HDCP_REPEATER"; - case MOD_HDCP_STATUS_HDCP1_NOT_CAPABLE: - return "MOD_HDCP_STATUS_HDCP1_NOT_CAPABLE"; - case MOD_HDCP_STATUS_HDCP1_R0_PRIME_PENDING: - return "MOD_HDCP_STATUS_HDCP1_R0_PRIME_PENDING"; - case MOD_HDCP_STATUS_HDCP1_VALIDATE_RX_FAILURE: - return "MOD_HDCP_STATUS_HDCP1_VALIDATE_RX_FAILURE"; - case MOD_HDCP_STATUS_HDCP1_BKSV_REVOKED: - return "MOD_HDCP_STATUS_HDCP1_BKSV_REVOKED"; - case MOD_HDCP_STATUS_HDCP1_KSV_LIST_NOT_READY: - return "MOD_HDCP_STATUS_HDCP1_KSV_LIST_NOT_READY"; - case MOD_HDCP_STATUS_HDCP1_VALIDATE_KSV_LIST_FAILURE: - return "MOD_HDCP_STATUS_HDCP1_VALIDATE_KSV_LIST_FAILURE"; - case MOD_HDCP_STATUS_HDCP1_KSV_LIST_REVOKED: - return "MOD_HDCP_STATUS_HDCP1_KSV_LIST_REVOKED"; - case MOD_HDCP_STATUS_HDCP1_ENABLE_ENCRYPTION_FAILURE: - return "MOD_HDCP_STATUS_HDCP1_ENABLE_ENCRYPTION_FAILURE"; - case MOD_HDCP_STATUS_HDCP1_ENABLE_STREAM_ENCRYPTION_FAILURE: - return "MOD_HDCP_STATUS_HDCP1_ENABLE_STREAM_ENCRYPTION_FAILURE"; - case MOD_HDCP_STATUS_HDCP1_MAX_CASCADE_EXCEEDED_FAILURE: - return "MOD_HDCP_STATUS_HDCP1_MAX_CASCADE_EXCEEDED_FAILURE"; - case MOD_HDCP_STATUS_HDCP1_MAX_DEVS_EXCEEDED_FAILURE: - return "MOD_HDCP_STATUS_HDCP1_MAX_DEVS_EXCEEDED_FAILURE"; - case MOD_HDCP_STATUS_HDCP1_DEVICE_COUNT_MISMATCH_FAILURE: - return "MOD_HDCP_STATUS_HDCP1_DEVICE_COUNT_MISMATCH_FAILURE"; - case MOD_HDCP_STATUS_HDCP1_LINK_INTEGRITY_FAILURE: - return "MOD_HDCP_STATUS_HDCP1_LINK_INTEGRITY_FAILURE"; - case MOD_HDCP_STATUS_HDCP1_REAUTH_REQUEST_ISSUED: - return "MOD_HDCP_STATUS_HDCP1_REAUTH_REQUEST_ISSUED"; - case MOD_HDCP_STATUS_HDCP1_LINK_MAINTENANCE_FAILURE: - return "MOD_HDCP_STATUS_HDCP1_LINK_MAINTENANCE_FAILURE"; - case MOD_HDCP_STATUS_HDCP1_INVALID_BKSV: - return "MOD_HDCP_STATUS_HDCP1_INVALID_BKSV"; - case MOD_HDCP_STATUS_DDC_FAILURE: - return "MOD_HDCP_STATUS_DDC_FAILURE"; - case MOD_HDCP_STATUS_INVALID_OPERATION: - return "MOD_HDCP_STATUS_INVALID_OPERATION"; - case MOD_HDCP_STATUS_HDCP2_NOT_CAPABLE: - return "MOD_HDCP_STATUS_HDCP2_NOT_CAPABLE"; - case MOD_HDCP_STATUS_HDCP2_CREATE_SESSION_FAILURE: - return "MOD_HDCP_STATUS_HDCP2_CREATE_SESSION_FAILURE"; - case MOD_HDCP_STATUS_HDCP2_DESTROY_SESSION_FAILURE: - return "MOD_HDCP_STATUS_HDCP2_DESTROY_SESSION_FAILURE"; - case MOD_HDCP_STATUS_HDCP2_PREP_AKE_INIT_FAILURE: - return "MOD_HDCP_STATUS_HDCP2_PREP_AKE_INIT_FAILURE"; - case MOD_HDCP_STATUS_HDCP2_AKE_CERT_PENDING: - return "MOD_HDCP_STATUS_HDCP2_AKE_CERT_PENDING"; - case MOD_HDCP_STATUS_HDCP2_H_PRIME_PENDING: - return "MOD_HDCP_STATUS_HDCP2_H_PRIME_PENDING"; - case MOD_HDCP_STATUS_HDCP2_PAIRING_INFO_PENDING: - return "MOD_HDCP_STATUS_HDCP2_PAIRING_INFO_PENDING"; - case MOD_HDCP_STATUS_HDCP2_VALIDATE_AKE_CERT_FAILURE: - return "MOD_HDCP_STATUS_HDCP2_VALIDATE_AKE_CERT_FAILURE"; - case MOD_HDCP_STATUS_HDCP2_AKE_CERT_REVOKED: - return "MOD_HDCP_STATUS_HDCP2_AKE_CERT_REVOKED"; - case MOD_HDCP_STATUS_HDCP2_VALIDATE_H_PRIME_FAILURE: - return "MOD_HDCP_STATUS_HDCP2_VALIDATE_H_PRIME_FAILURE"; - case MOD_HDCP_STATUS_HDCP2_VALIDATE_PAIRING_INFO_FAILURE: - return "MOD_HDCP_STATUS_HDCP2_VALIDATE_PAIRING_INFO_FAILURE"; - case MOD_HDCP_STATUS_HDCP2_PREP_LC_INIT_FAILURE: - return "MOD_HDCP_STATUS_HDCP2_PREP_LC_INIT_FAILURE"; - case MOD_HDCP_STATUS_HDCP2_L_PRIME_PENDING: - return "MOD_HDCP_STATUS_HDCP2_L_PRIME_PENDING"; - case MOD_HDCP_STATUS_HDCP2_VALIDATE_L_PRIME_FAILURE: - return "MOD_HDCP_STATUS_HDCP2_VALIDATE_L_PRIME_FAILURE"; - case MOD_HDCP_STATUS_HDCP2_PREP_EKS_FAILURE: - return "MOD_HDCP_STATUS_HDCP2_PREP_EKS_FAILURE"; - case MOD_HDCP_STATUS_HDCP2_ENABLE_ENCRYPTION_FAILURE: - return "MOD_HDCP_STATUS_HDCP2_ENABLE_ENCRYPTION_FAILURE"; - case MOD_HDCP_STATUS_HDCP2_VALIDATE_RX_ID_LIST_FAILURE: - return "MOD_HDCP_STATUS_HDCP2_VALIDATE_RX_ID_LIST_FAILURE"; - case MOD_HDCP_STATUS_HDCP2_RX_ID_LIST_REVOKED: - return "MOD_HDCP_STATUS_HDCP2_RX_ID_LIST_REVOKED"; - case MOD_HDCP_STATUS_HDCP2_RX_ID_LIST_NOT_READY: - return "MOD_HDCP_STATUS_HDCP2_RX_ID_LIST_NOT_READY"; - case MOD_HDCP_STATUS_HDCP2_ENABLE_STREAM_ENCRYPTION_FAILURE: - return "MOD_HDCP_STATUS_HDCP2_ENABLE_STREAM_ENCRYPTION_FAILURE"; - case MOD_HDCP_STATUS_HDCP2_STREAM_READY_PENDING: - return "MOD_HDCP_STATUS_HDCP2_STREAM_READY_PENDING"; - case MOD_HDCP_STATUS_HDCP2_VALIDATE_STREAM_READY_FAILURE: - return "MOD_HDCP_STATUS_HDCP2_VALIDATE_STREAM_READY_FAILURE"; - case MOD_HDCP_STATUS_HDCP2_PREPARE_STREAM_MANAGEMENT_FAILURE: - return "MOD_HDCP_STATUS_HDCP2_PREPARE_STREAM_MANAGEMENT_FAILURE"; - case MOD_HDCP_STATUS_HDCP2_REAUTH_REQUEST: - return "MOD_HDCP_STATUS_HDCP2_REAUTH_REQUEST"; - case MOD_HDCP_STATUS_HDCP2_REAUTH_LINK_INTEGRITY_FAILURE: - return "MOD_HDCP_STATUS_HDCP2_REAUTH_LINK_INTEGRITY_FAILURE"; - case MOD_HDCP_STATUS_HDCP2_DEVICE_COUNT_MISMATCH_FAILURE: - return "MOD_HDCP_STATUS_HDCP2_DEVICE_COUNT_MISMATCH_FAILURE"; - case MOD_HDCP_STATUS_UNSUPPORTED_PSP_VER_FAILURE: - return "MOD_HDCP_STATUS_UNSUPPORTED_PSP_VER_FAILURE"; - case MOD_HDCP_STATUS_HDCP2_LOCALITY_COMBO_READ_FAILURE: - return "MOD_HDCP_STATUS_HDCP2_LOCALITY_COMBO_READ_FAILURE"; + MOD_HDCP_STATUS_LIST(CASE_FORMAT) default: return "MOD_HDCP_STATUS_UNKNOWN"; } diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h b/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h index 264348989e9b..835467225458 100644 --- a/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h +++ b/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h @@ -35,70 +35,74 @@ struct mod_hdcp; #define MAX_NUM_OF_DISPLAYS 6 #define MAX_NUM_OF_ATTEMPTS 4 #define MAX_NUM_OF_ERROR_TRACE 10 +#define MOD_HDCP_STATUS_LIST(FORMAT) \ + FORMAT(MOD_HDCP_STATUS_SUCCESS) \ + FORMAT(MOD_HDCP_STATUS_FAILURE) \ + FORMAT(MOD_HDCP_STATUS_RESET_NEEDED) \ + FORMAT(MOD_HDCP_STATUS_DISPLAY_OUT_OF_BOUND) \ + FORMAT(MOD_HDCP_STATUS_DISPLAY_NOT_FOUND) \ + FORMAT(MOD_HDCP_STATUS_INVALID_STATE) \ + FORMAT(MOD_HDCP_STATUS_NOT_IMPLEMENTED) \ + FORMAT(MOD_HDCP_STATUS_INTERNAL_POLICY_FAILURE) \ + FORMAT(MOD_HDCP_STATUS_UPDATE_TOPOLOGY_FAILURE) \ + FORMAT(MOD_HDCP_STATUS_CREATE_PSP_SERVICE_FAILURE) \ + FORMAT(MOD_HDCP_STATUS_DESTROY_PSP_SERVICE_FAILURE) \ + FORMAT(MOD_HDCP_STATUS_HDCP1_CREATE_SESSION_FAILURE) \ + FORMAT(MOD_HDCP_STATUS_HDCP1_DESTROY_SESSION_FAILURE) \ + FORMAT(MOD_HDCP_STATUS_HDCP1_VALIDATE_ENCRYPTION_FAILURE) \ + FORMAT(MOD_HDCP_STATUS_HDCP1_NOT_HDCP_REPEATER) \ + FORMAT(MOD_HDCP_STATUS_HDCP1_NOT_CAPABLE) \ + FORMAT(MOD_HDCP_STATUS_HDCP1_R0_PRIME_PENDING) \ + FORMAT(MOD_HDCP_STATUS_HDCP1_VALIDATE_RX_FAILURE) \ + FORMAT(MOD_HDCP_STATUS_HDCP1_BKSV_REVOKED) \ + FORMAT(MOD_HDCP_STATUS_HDCP1_KSV_LIST_NOT_READY) \ + FORMAT(MOD_HDCP_STATUS_HDCP1_VALIDATE_KSV_LIST_FAILURE) \ + FORMAT(MOD_HDCP_STATUS_HDCP1_KSV_LIST_REVOKED) \ + FORMAT(MOD_HDCP_STATUS_HDCP1_ENABLE_ENCRYPTION_FAILURE) \ + FORMAT(MOD_HDCP_STATUS_HDCP1_ENABLE_STREAM_ENCRYPTION_FAILURE) \ + FORMAT(MOD_HDCP_STATUS_HDCP1_MAX_CASCADE_EXCEEDED_FAILURE) \ + FORMAT(MOD_HDCP_STATUS_HDCP1_MAX_DEVS_EXCEEDED_FAILURE) \ + FORMAT(MOD_HDCP_STATUS_HDCP1_DEVICE_COUNT_MISMATCH_FAILURE) \ + FORMAT(MOD_HDCP_STATUS_HDCP1_LINK_INTEGRITY_FAILURE) \ + FORMAT(MOD_HDCP_STATUS_HDCP1_REAUTH_REQUEST_ISSUED) \ + FORMAT(MOD_HDCP_STATUS_HDCP1_LINK_MAINTENANCE_FAILURE) \ + FORMAT(MOD_HDCP_STATUS_HDCP1_INVALID_BKSV) \ + FORMAT(MOD_HDCP_STATUS_DDC_FAILURE) /* TODO: specific errors */ \ + FORMAT(MOD_HDCP_STATUS_INVALID_OPERATION) \ + FORMAT(MOD_HDCP_STATUS_HDCP2_NOT_CAPABLE) \ + FORMAT(MOD_HDCP_STATUS_HDCP2_CREATE_SESSION_FAILURE) \ + FORMAT(MOD_HDCP_STATUS_HDCP2_DESTROY_SESSION_FAILURE) \ + FORMAT(MOD_HDCP_STATUS_HDCP2_PREP_AKE_INIT_FAILURE) \ + FORMAT(MOD_HDCP_STATUS_HDCP2_AKE_CERT_PENDING) \ + FORMAT(MOD_HDCP_STATUS_HDCP2_H_PRIME_PENDING) \ + FORMAT(MOD_HDCP_STATUS_HDCP2_PAIRING_INFO_PENDING) \ + FORMAT(MOD_HDCP_STATUS_HDCP2_VALIDATE_AKE_CERT_FAILURE) \ + FORMAT(MOD_HDCP_STATUS_HDCP2_AKE_CERT_REVOKED) \ + FORMAT(MOD_HDCP_STATUS_HDCP2_VALIDATE_H_PRIME_FAILURE) \ + FORMAT(MOD_HDCP_STATUS_HDCP2_VALIDATE_PAIRING_INFO_FAILURE) \ + FORMAT(MOD_HDCP_STATUS_HDCP2_PREP_LC_INIT_FAILURE) \ + FORMAT(MOD_HDCP_STATUS_HDCP2_L_PRIME_PENDING) \ + FORMAT(MOD_HDCP_STATUS_HDCP2_VALIDATE_L_PRIME_FAILURE) \ + FORMAT(MOD_HDCP_STATUS_HDCP2_PREP_EKS_FAILURE) \ + FORMAT(MOD_HDCP_STATUS_HDCP2_ENABLE_ENCRYPTION_FAILURE) \ + FORMAT(MOD_HDCP_STATUS_HDCP2_RX_ID_LIST_NOT_READY) \ + FORMAT(MOD_HDCP_STATUS_HDCP2_VALIDATE_RX_ID_LIST_FAILURE) \ + FORMAT(MOD_HDCP_STATUS_HDCP2_RX_ID_LIST_REVOKED) \ + FORMAT(MOD_HDCP_STATUS_HDCP2_ENABLE_STREAM_ENCRYPTION_FAILURE) \ + FORMAT(MOD_HDCP_STATUS_HDCP2_STREAM_READY_PENDING) \ + FORMAT(MOD_HDCP_STATUS_HDCP2_VALIDATE_STREAM_READY_FAILURE) \ + FORMAT(MOD_HDCP_STATUS_HDCP2_PREPARE_STREAM_MANAGEMENT_FAILURE) \ + FORMAT(MOD_HDCP_STATUS_HDCP2_REAUTH_REQUEST) \ + FORMAT(MOD_HDCP_STATUS_HDCP2_REAUTH_LINK_INTEGRITY_FAILURE) \ + FORMAT(MOD_HDCP_STATUS_HDCP2_DEVICE_COUNT_MISMATCH_FAILURE) \ + FORMAT(MOD_HDCP_STATUS_UNSUPPORTED_PSP_VER_FAILURE) \ + FORMAT(MOD_HDCP_STATUS_HDCP2_LOCALITY_COMBO_READ_FAILURE) + +#define ENUM_FORMAT(entry) entry, /* detailed return status */ enum mod_hdcp_status { - MOD_HDCP_STATUS_SUCCESS = 0, - MOD_HDCP_STATUS_FAILURE, - MOD_HDCP_STATUS_RESET_NEEDED, - MOD_HDCP_STATUS_DISPLAY_OUT_OF_BOUND, - MOD_HDCP_STATUS_DISPLAY_NOT_FOUND, - MOD_HDCP_STATUS_INVALID_STATE, - MOD_HDCP_STATUS_NOT_IMPLEMENTED, - MOD_HDCP_STATUS_INTERNAL_POLICY_FAILURE, - MOD_HDCP_STATUS_UPDATE_TOPOLOGY_FAILURE, - MOD_HDCP_STATUS_CREATE_PSP_SERVICE_FAILURE, - MOD_HDCP_STATUS_DESTROY_PSP_SERVICE_FAILURE, - MOD_HDCP_STATUS_HDCP1_CREATE_SESSION_FAILURE, - MOD_HDCP_STATUS_HDCP1_DESTROY_SESSION_FAILURE, - MOD_HDCP_STATUS_HDCP1_VALIDATE_ENCRYPTION_FAILURE, - MOD_HDCP_STATUS_HDCP1_NOT_HDCP_REPEATER, - MOD_HDCP_STATUS_HDCP1_NOT_CAPABLE, - MOD_HDCP_STATUS_HDCP1_R0_PRIME_PENDING, - MOD_HDCP_STATUS_HDCP1_VALIDATE_RX_FAILURE, - MOD_HDCP_STATUS_HDCP1_BKSV_REVOKED, - MOD_HDCP_STATUS_HDCP1_KSV_LIST_NOT_READY, - MOD_HDCP_STATUS_HDCP1_VALIDATE_KSV_LIST_FAILURE, - MOD_HDCP_STATUS_HDCP1_KSV_LIST_REVOKED, - MOD_HDCP_STATUS_HDCP1_ENABLE_ENCRYPTION_FAILURE, - MOD_HDCP_STATUS_HDCP1_ENABLE_STREAM_ENCRYPTION_FAILURE, - MOD_HDCP_STATUS_HDCP1_MAX_CASCADE_EXCEEDED_FAILURE, - MOD_HDCP_STATUS_HDCP1_MAX_DEVS_EXCEEDED_FAILURE, - MOD_HDCP_STATUS_HDCP1_DEVICE_COUNT_MISMATCH_FAILURE, - MOD_HDCP_STATUS_HDCP1_LINK_INTEGRITY_FAILURE, - MOD_HDCP_STATUS_HDCP1_REAUTH_REQUEST_ISSUED, - MOD_HDCP_STATUS_HDCP1_LINK_MAINTENANCE_FAILURE, - MOD_HDCP_STATUS_HDCP1_INVALID_BKSV, - MOD_HDCP_STATUS_DDC_FAILURE, /* TODO: specific errors */ - MOD_HDCP_STATUS_INVALID_OPERATION, - MOD_HDCP_STATUS_HDCP2_NOT_CAPABLE, - MOD_HDCP_STATUS_HDCP2_CREATE_SESSION_FAILURE, - MOD_HDCP_STATUS_HDCP2_DESTROY_SESSION_FAILURE, - MOD_HDCP_STATUS_HDCP2_PREP_AKE_INIT_FAILURE, - MOD_HDCP_STATUS_HDCP2_AKE_CERT_PENDING, - MOD_HDCP_STATUS_HDCP2_H_PRIME_PENDING, - MOD_HDCP_STATUS_HDCP2_PAIRING_INFO_PENDING, - MOD_HDCP_STATUS_HDCP2_VALIDATE_AKE_CERT_FAILURE, - MOD_HDCP_STATUS_HDCP2_AKE_CERT_REVOKED, - MOD_HDCP_STATUS_HDCP2_VALIDATE_H_PRIME_FAILURE, - MOD_HDCP_STATUS_HDCP2_VALIDATE_PAIRING_INFO_FAILURE, - MOD_HDCP_STATUS_HDCP2_PREP_LC_INIT_FAILURE, - MOD_HDCP_STATUS_HDCP2_L_PRIME_PENDING, - MOD_HDCP_STATUS_HDCP2_VALIDATE_L_PRIME_FAILURE, - MOD_HDCP_STATUS_HDCP2_PREP_EKS_FAILURE, - MOD_HDCP_STATUS_HDCP2_ENABLE_ENCRYPTION_FAILURE, - MOD_HDCP_STATUS_HDCP2_RX_ID_LIST_NOT_READY, - MOD_HDCP_STATUS_HDCP2_VALIDATE_RX_ID_LIST_FAILURE, - MOD_HDCP_STATUS_HDCP2_RX_ID_LIST_REVOKED, - MOD_HDCP_STATUS_HDCP2_ENABLE_STREAM_ENCRYPTION_FAILURE, - MOD_HDCP_STATUS_HDCP2_STREAM_READY_PENDING, - MOD_HDCP_STATUS_HDCP2_VALIDATE_STREAM_READY_FAILURE, - MOD_HDCP_STATUS_HDCP2_PREPARE_STREAM_MANAGEMENT_FAILURE, - MOD_HDCP_STATUS_HDCP2_REAUTH_REQUEST, - MOD_HDCP_STATUS_HDCP2_REAUTH_LINK_INTEGRITY_FAILURE, - MOD_HDCP_STATUS_HDCP2_DEVICE_COUNT_MISMATCH_FAILURE, - MOD_HDCP_STATUS_UNSUPPORTED_PSP_VER_FAILURE, - MOD_HDCP_STATUS_HDCP2_LOCALITY_COMBO_READ_FAILURE, + MOD_HDCP_STATUS_LIST(ENUM_FORMAT) }; struct mod_hdcp_displayport { From 9dff2bb709e6fbd97e263fd12bf12802d2b5a0cf Mon Sep 17 00:00:00 2001 From: Vitaly Prosyak Date: Thu, 6 Nov 2025 12:35:53 -0500 Subject: [PATCH 03/67] drm/amdgpu: disable peer-to-peer access for DCC-enabled GC12 VRAM surfaces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Certain multi-GPU configurations (especially GFX12) may hit data corruption when a DCC-compressed VRAM surface is shared across GPUs using peer-to-peer (P2P) DMA transfers. Such surfaces rely on device-local metadata and cannot be safely accessed through a remote GPU’s page tables. Attempting to import a DCC-enabled surface through P2P leads to incorrect rendering or GPU faults. This change disables P2P for DCC-enabled VRAM buffers that are contiguous and allocated on GFX12+ hardware. In these cases, the importer falls back to the standard system-memory path, avoiding invalid access to compressed surfaces. Future work could consider optional migration (VRAM→System→VRAM) if a performance regression is observed when `attach->peer2peer = false`. Tested on: - Dual RX 9700 XT (Navi4x) setup - GNOME and Wayland compositor scenarios - Confirmed no corruption after disabling P2P under these conditions v2: Remove check TTM_PL_VRAM & TTM_PL_FLAG_CONTIGUOUS. v3: simplify for upsteam and fix ip version check (Alex) Suggested-by: Christian König Signed-off-by: Vitaly Prosyak Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 268d69d862e0..c1461317eb29 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -83,6 +83,18 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf, struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); int r; + /* + * Disable peer-to-peer access for DCC-enabled VRAM surfaces on GFX12+. + * Such buffers cannot be safely accessed over P2P due to device-local + * compression metadata. Fallback to system-memory path instead. + * Device supports GFX12 (GC 12.x or newer) + * BO was created with the AMDGPU_GEM_CREATE_GFX12_DCC flag + * + */ + if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 0, 0) && + bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) + attach->peer2peer = false; + if (!amdgpu_dmabuf_is_xgmi_accessible(attach_adev, bo) && pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0) attach->peer2peer = false; From 50d9ebd66ea1393412e341c3a44706d80b38d872 Mon Sep 17 00:00:00 2001 From: "Mario Limonciello (AMD)" Date: Thu, 6 Nov 2025 14:55:27 -0600 Subject: [PATCH 04/67] drm/amd: Clarify that amdgpu.audio only works for non-DC The comment already explains it but the module parameter help text doesn't. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4684 Signed-off-by: Mario Limonciello (AMD) Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index cb522d6272d6..ef23acaf5a2c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -312,7 +312,7 @@ module_param_named(moverate, amdgpu_moverate, int, 0600); * DOC: audio (int) * Set HDMI/DPAudio. Only affects non-DC display handling. The default is -1 (Enabled), set 0 to disabled it. */ -MODULE_PARM_DESC(audio, "Audio enable (-1 = auto, 0 = disable, 1 = enable)"); +MODULE_PARM_DESC(audio, "HDMI/DP Audio enable for non DC displays (-1 = auto, 0 = disable, 1 = enable)"); module_param_named(audio, amdgpu_audio, int, 0444); /** From e84835940e60a7d5263767ee92acc08f9877cb26 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Wed, 27 Aug 2025 15:48:06 +0800 Subject: [PATCH 05/67] drm/amdgpu: get RAS bad page address from MCA address Instead of from physical address. v2: add comment to make the code more readable Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 15 ++++++++++++--- drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c | 4 ++-- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 36a5393d6b74..9e2e098af86c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -3014,8 +3014,13 @@ static int amdgpu_ras_mca2pa_by_idx(struct amdgpu_device *adev, addr_in.ma.err_addr = bps->address; addr_in.ma.socket_id = socket; addr_in.ma.ch_inst = bps->mem_channel; - /* tell RAS TA the node instance is not used */ - addr_in.ma.node_inst = TA_RAS_INV_NODE; + if (!amdgpu_ras_smu_eeprom_supported(adev)) { + /* tell RAS TA the node instance is not used */ + addr_in.ma.node_inst = TA_RAS_INV_NODE; + } else { + addr_in.ma.umc_inst = bps->mcumc_id; + addr_in.ma.node_inst = bps->cu; + } if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr) ret = adev->umc.ras->convert_ras_err_addr(adev, err_data, @@ -3162,7 +3167,11 @@ static int __amdgpu_ras_convert_rec_from_rom(struct amdgpu_device *adev, save_nps = (bps->retired_page >> UMC_NPS_SHIFT) & UMC_NPS_MASK; bps->retired_page &= ~(UMC_NPS_MASK << UMC_NPS_SHIFT); } else { - save_nps = nps; + /* if pmfw manages eeprom, save_nps is not stored on eeprom, + * we should always convert mca address into physical address, + * make save_nps different from nps + */ + save_nps = nps + 1; } if (save_nps == nps) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index 670c0dedf4e9..ec248ca6ef93 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -1022,9 +1022,9 @@ int amdgpu_ras_eeprom_read_idx(struct amdgpu_ras_eeprom_control *control, record[i - rec_idx].retired_page = 0x1ULL; record[i - rec_idx].ts = ts; record[i - rec_idx].err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE; - record[i - rec_idx].cu = 0; - adev->umc.ras->mca_ipid_parse(adev, ipid, NULL, + adev->umc.ras->mca_ipid_parse(adev, ipid, + (uint32_t *)&(record[i - rec_idx].cu), (uint32_t *)&(record[i - rec_idx].mem_channel), (uint32_t *)&(record[i - rec_idx].mcumc_id), NULL); } From 42993bcf1c8d773d783a613d2d4e9172c16b4926 Mon Sep 17 00:00:00 2001 From: Asad Kamal Date: Thu, 6 Nov 2025 23:47:29 +0800 Subject: [PATCH 06/67] drm/amd/pm: Add NULL check for power limit Add NULL check for smu power limit pointer v2: Update error code on failure (Lijo) Signed-off-by: Asad Kamal Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index c0e7c45ac0e6..14351ec70701 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -2907,6 +2907,9 @@ int smu_get_power_limit(void *handle, if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled) return -EOPNOTSUPP; + if (!limit) + return -EINVAL; + switch (pp_power_type) { case PP_PWR_TYPE_SUSTAINED: limit_type = SMU_DEFAULT_PPT_LIMIT; From 37cdb89c0a5940e41682e1f89a3ed88990a94887 Mon Sep 17 00:00:00 2001 From: Ahmad Rehman Date: Wed, 5 Nov 2025 09:48:08 -0500 Subject: [PATCH 07/67] drm/amdkfd: Fixing the clang format This patch fixes the formatting in the patch "amdkfd: Do not wait for queue op response during reset" Signed-off-by: Ahmad Rehman Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 0904c36192c7..d7a2e7178ea9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -2095,7 +2095,7 @@ int amdkfd_fence_wait_timeout(struct device_queue_manager *dqm, while (*fence_addr != fence_value) { /* Fatal err detected, this response won't come */ if (amdgpu_amdkfd_is_fed(dqm->dev->adev) || - amdgpu_in_reset(dqm->dev->adev)) + amdgpu_in_reset(dqm->dev->adev)) return -EIO; if (time_after(jiffies, end_jiffies)) { From ec49374ccb8da86b465beaf09c367f3dfd648d8f Mon Sep 17 00:00:00 2001 From: Pierre-Eric Pelloux-Prayer Date: Tue, 4 Nov 2025 10:42:45 +0100 Subject: [PATCH 08/67] drm/amdgpu: jump to the correct label on failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drm_sched_entity_init wasn't called yet, so the only thing to do is to release allocated memory. This doesn't fix any bug since entity is zero allocated and drm_sched_entity_fini does nothing in this case. Signed-off-by: Pierre-Eric Pelloux-Prayer Reviewed-by: Tvrtko Ursulin Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index f5d5c45ddc0d..afedea02188d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -236,7 +236,7 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip, r = amdgpu_xcp_select_scheds(adev, hw_ip, hw_prio, fpriv, &num_scheds, &scheds); if (r) - goto cleanup_entity; + goto error_free_entity; } /* disable load balance if the hw engine retains context among dependent jobs */ From 334b27bf712b5ddd19908aba318175e4b9bcf839 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Wed, 27 Aug 2025 19:33:02 +0800 Subject: [PATCH 09/67] drm/amdgpu: try for more times if RAS bad page number is not updated RAS info update in PMFW is time cost, wait for it. Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index ec248ca6ef93..01b38a6e198e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -874,13 +874,33 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control) int amdgpu_ras_eeprom_update_record_num(struct amdgpu_ras_eeprom_control *control) { struct amdgpu_device *adev = to_amdgpu_device(control); + int ret, timeout = 1000; if (!amdgpu_ras_smu_eeprom_supported(adev)) return 0; control->ras_num_recs_old = control->ras_num_recs; - return amdgpu_ras_smu_get_badpage_count(adev, + + do { + ret = amdgpu_ras_smu_get_badpage_count(adev, &(control->ras_num_recs), 12); + if (!ret && + (control->ras_num_recs_old == control->ras_num_recs)) { + /* record number update in PMFW needs some time */ + msleep(50); + timeout -= 50; + } else { + break; + } + } while (timeout); + + /* no update of record number is not a real failure, + * don't print warning here + */ + if (!ret && (control->ras_num_recs_old == control->ras_num_recs)) + ret = -EINVAL; + + return ret; } /** From ded3ad780cf97a04927773c4600823b84f7f3cc2 Mon Sep 17 00:00:00 2001 From: "Jesse.Zhang" Date: Fri, 24 Oct 2025 16:09:25 +0800 Subject: [PATCH 10/67] drm/amdgpu: fix lock warning in amdgpu_userq_fence_driver_process MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a potential deadlock caused by inconsistent spinlock usage between interrupt and process contexts in the userq fence driver. The issue occurs when amdgpu_userq_fence_driver_process() is called from both: - Interrupt context: gfx_v11_0_eop_irq() -> amdgpu_userq_fence_driver_process() - Process context: amdgpu_eviction_fence_suspend_worker() -> amdgpu_userq_fence_driver_force_completion() -> amdgpu_userq_fence_driver_process() In interrupt context, the spinlock was acquired without disabling interrupts, leaving it in {IN-HARDIRQ-W} state. When the same lock is acquired in process context, the kernel detects inconsistent locking since the process context acquisition would enable interrupts while holding a lock previously acquired in interrupt context. Kernel log shows: [ 4039.310790] inconsistent {IN-HARDIRQ-W} -> {HARDIRQ-ON-W} usage. [ 4039.310804] kworker/7:2/409 [HC0[0]:SC0[0]:HE1:SE1] takes: [ 4039.310818] ffff9284e1bed000 (&fence_drv->fence_list_lock){?...}-{3:3}, [ 4039.310993] {IN-HARDIRQ-W} state was registered at: [ 4039.311004] lock_acquire+0xc6/0x300 [ 4039.311018] _raw_spin_lock+0x39/0x80 [ 4039.311031] amdgpu_userq_fence_driver_process.part.0+0x30/0x180 [amdgpu] [ 4039.311146] amdgpu_userq_fence_driver_process+0x17/0x30 [amdgpu] [ 4039.311257] gfx_v11_0_eop_irq+0x132/0x170 [amdgpu] Fix by using spin_lock_irqsave()/spin_unlock_irqrestore() to properly manage interrupt state regardless of calling context. Reviewed-by: Christian König Signed-off-by: Jesse Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index 99ae1d19b751..eba9fb359047 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -151,15 +151,16 @@ void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_d { struct amdgpu_userq_fence *userq_fence, *tmp; struct dma_fence *fence; + unsigned long flags; u64 rptr; int i; if (!fence_drv) return; + spin_lock_irqsave(&fence_drv->fence_list_lock, flags); rptr = amdgpu_userq_fence_read(fence_drv); - spin_lock(&fence_drv->fence_list_lock); list_for_each_entry_safe(userq_fence, tmp, &fence_drv->fences, link) { fence = &userq_fence->base; @@ -174,7 +175,7 @@ void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_d list_del(&userq_fence->link); dma_fence_put(fence); } - spin_unlock(&fence_drv->fence_list_lock); + spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags); } void amdgpu_userq_fence_driver_destroy(struct kref *ref) From eed30152746ec1d8b6e8ab31e349f1eb8d8bd666 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Wed, 24 Sep 2025 17:52:24 +0800 Subject: [PATCH 11/67] drm/amdgpu: add RAS bad page threshold handling for PMFW manages eeprom Check if bad page threshold is reached and take actions accordingly. v2: remove rma message sent to smu when pmfw manages eeprom. v3: add null pointer check for con. Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c | 36 +++++++++++++++---- 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index 01b38a6e198e..99aa1908833d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -903,6 +903,33 @@ int amdgpu_ras_eeprom_update_record_num(struct amdgpu_ras_eeprom_control *contro return ret; } +static int amdgpu_ras_smu_eeprom_append(struct amdgpu_ras_eeprom_control *control) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + + if (!amdgpu_ras_smu_eeprom_supported(adev) || !con) + return 0; + + control->ras_num_bad_pages = con->bad_page_num; + + if (amdgpu_bad_page_threshold != 0 && + control->ras_num_bad_pages > con->bad_page_cnt_threshold) { + dev_warn(adev->dev, + "Saved bad pages %d reaches threshold value %d\n", + control->ras_num_bad_pages, con->bad_page_cnt_threshold); + + if (adev->cper.enabled && amdgpu_cper_generate_bp_threshold_record(adev)) + dev_warn(adev->dev, "fail to generate bad page threshold cper records\n"); + + if ((amdgpu_bad_page_threshold != -1) && + (amdgpu_bad_page_threshold != -2)) + con->is_rma = true; + } + + return 0; +} + /** * amdgpu_ras_eeprom_append -- append records to the EEPROM RAS table * @control: pointer to control structure @@ -921,17 +948,14 @@ int amdgpu_ras_eeprom_append(struct amdgpu_ras_eeprom_control *control, const u32 num) { struct amdgpu_device *adev = to_amdgpu_device(control); - struct amdgpu_ras *con = amdgpu_ras_get_context(adev); int res, i; uint64_t nps = AMDGPU_NPS1_PARTITION_MODE; - if (!__is_ras_eeprom_supported(adev) || !con) + if (!__is_ras_eeprom_supported(adev)) return 0; - if (amdgpu_ras_smu_eeprom_supported(adev)) { - control->ras_num_bad_pages = con->bad_page_num; - return 0; - } + if (amdgpu_ras_smu_eeprom_supported(adev)) + return amdgpu_ras_smu_eeprom_append(control); if (num == 0) { dev_err(adev->dev, "will not append 0 records\n"); From 7fb41ab3c94828ad48e1a6d2237e8a7e682c74b9 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Thu, 6 Nov 2025 16:26:56 +0800 Subject: [PATCH 12/67] drm/amdgpu: optimize timeout implemention in ras_eeprom_update_record_num The busy status returned by ras_eeprom_update_record_num may not be an error, increase timeout to exclude false busy status. Also add more comments to make the code readable. v2: define a macro for the timeout value. Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index 99aa1908833d..64dd7a81bff5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -124,6 +124,8 @@ RAS_TABLE_V2_1_INFO_SIZE) \ / RAS_TABLE_RECORD_SIZE) +#define RAS_SMU_MESSAGE_TIMEOUT_MS 1000 /* 1s */ + /* Given a zero-based index of an EEPROM RAS record, yields the EEPROM * offset off of RAS_TABLE_START. That is, this is something you can * add to control->i2c_address, and then tell I2C layer to read @@ -874,7 +876,7 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control) int amdgpu_ras_eeprom_update_record_num(struct amdgpu_ras_eeprom_control *control) { struct amdgpu_device *adev = to_amdgpu_device(control); - int ret, timeout = 1000; + int ret, retry = 20; if (!amdgpu_ras_smu_eeprom_supported(adev)) return 0; @@ -882,17 +884,23 @@ int amdgpu_ras_eeprom_update_record_num(struct amdgpu_ras_eeprom_control *contro control->ras_num_recs_old = control->ras_num_recs; do { + /* 1000ms timeout is long enough, smu_get_badpage_count won't + * return -EBUSY before timeout. + */ ret = amdgpu_ras_smu_get_badpage_count(adev, - &(control->ras_num_recs), 12); + &(control->ras_num_recs), RAS_SMU_MESSAGE_TIMEOUT_MS); if (!ret && (control->ras_num_recs_old == control->ras_num_recs)) { - /* record number update in PMFW needs some time */ + /* record number update in PMFW needs some time, + * smu_get_badpage_count may return immediately without + * count update, sleep for a while and retry again. + */ msleep(50); - timeout -= 50; + retry--; } else { break; } - } while (timeout); + } while (retry); /* no update of record number is not a real failure, * don't print warning here From 0ea8176ce6a912ff7f4ab9a218ef61552230b472 Mon Sep 17 00:00:00 2001 From: Gangliang Xie Date: Thu, 6 Nov 2025 11:06:21 +0800 Subject: [PATCH 13/67] drm/amd/pm: remove unnecessary prints for smu busy smu busy is a normal case when calling SMU_MSG_GetBadPageCount, so no need to print error status at each time.Instead, only print error status when timeout given by user is reached. Signed-off-by: Gangliang Xie Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c | 2 ++ drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c | 10 +++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c index 0ce8cff27bf9..fc580800609c 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c @@ -956,6 +956,8 @@ static int smu_v13_0_12_get_badpage_count(struct amdgpu_device *adev, uint32_t * now = (uint64_t)ktime_to_ms(ktime_get()); } while (now < end); + dev_err(adev->dev, + "smu get bad page count timeout!\n"); return ret; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c index 3b98065dac1d..4040ff926544 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c @@ -164,9 +164,13 @@ static void __smu_cmn_reg_print_error(struct smu_context *smu, msg_index, param, message); break; case SMU_RESP_BUSY_OTHER: - dev_err_ratelimited(adev->dev, - "SMU: I'm very busy for your command: index:%d param:0x%08X message:%s", - msg_index, param, message); + /* It is normal for SMU_MSG_GetBadPageCount to return busy + * so don't print error at this case. + */ + if (msg != SMU_MSG_GetBadPageCount) + dev_err_ratelimited(adev->dev, + "SMU: I'm very busy for your command: index:%d param:0x%08X message:%s", + msg_index, param, message); break; case SMU_RESP_DEBUG_END: dev_err_ratelimited(adev->dev, From edcace98fdfe20c485f33cfadabe78fce73cbe36 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Thu, 23 Oct 2025 15:26:33 -0400 Subject: [PATCH 14/67] drm/amd/display: Only initialize LSDMA if it is supported in DMU Need to check caps flag to determine whether LSDMA is supported in DMU Reviewed-by: Rafal Ostrowski Signed-off-by: Alvin Lee Signed-off-by: Fangzhi Zuo Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c | 3 +++ drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index fffbf1983143..7b09af1cb306 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -2084,6 +2084,9 @@ bool dmub_lsdma_init(struct dc_dmub_srv *dc_dmub_srv) struct dmub_cmd_lsdma_data *lsdma_data = &cmd.lsdma.lsdma_data; bool result; + if (!dc_dmub_srv->dmub->feature_caps.lsdma_support_in_dmu) + return false; + memset(&cmd, 0, sizeof(cmd)); cmd.cmd_common.header.type = DMUB_CMD__LSDMA; diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index 9bc512a522e0..e956722209ba 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -721,6 +721,7 @@ struct dmub_feature_caps { uint8_t replay_supported; uint8_t replay_reserved[3]; uint8_t abm_aux_backlight_support; + uint8_t lsdma_support_in_dmu; }; struct dmub_visual_confirm_color { From 8b9eac5e0faebaffc5411505e0df0d00dc09504c Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Mon, 27 Oct 2025 12:11:15 -0400 Subject: [PATCH 15/67] drm/amd/display: Fix index bug for fill latency [WHY&HOW] This array should be indexed by pstate type followed by plane index. Reviewed-by: Austin Zheng Signed-off-by: Dillon Varone Signed-off-by: Fangzhi Zuo Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- .../dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4_calcs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4_calcs.c index df81bd963bb8..a02e9fd6b5ca 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml2_0/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -12944,7 +12944,7 @@ void dml2_core_calcs_get_plane_support_info(const struct dml2_display_cfg *displ out->active_latency_hiding_us = (int)mode_lib->ms.VActiveLatencyHidingUs[plane_idx]; out->vactive_det_fill_delay_us[dml2_pstate_type_uclk] = - (unsigned int)math_ceil(mode_lib->ms.pstate_vactive_det_fill_delay_us[plane_idx][dml2_pstate_type_uclk]); + (unsigned int)math_ceil(mode_lib->ms.pstate_vactive_det_fill_delay_us[dml2_pstate_type_uclk][plane_idx]); } void dml2_core_calcs_get_stream_support_info(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct core_stream_support_info *out, int plane_index) From 607df8248a011524211ee34850345305a1913f9e Mon Sep 17 00:00:00 2001 From: Ivan Lipski Date: Thu, 23 Oct 2025 10:03:59 -0400 Subject: [PATCH 16/67] drm/amd/display: Allow VRR params change if unsynced with the stream MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [Why] When changing resolution (e.g., 4K → FHD) in mirror/clone mode with certain monitors, the monitor blanks and loses connection due to an early exit in vrr_settings_require_update(). The function only checks if VRR state, fixed refresh target, or min/max refresh rate range has changed. During mode changes, if the calculated min/max refresh values remain the same even though the stream's v_total changed, the function returns early without updating vrr_params.adjust.v_total_min/max, leaving the monitor's VRR timing parameters unsynced with the new mode, causing it to blank out. [How] Explicitly adjust VRR parameters to the stream's nominal v_total when VRR is supported, but inactive. Fixes: 6d31602a9f57 ("drm/amd/display: more liberal vmin/vmax update for freesync") Reviewed-by: Aurabindo Pillai Signed-off-by: Ivan Lipski Signed-off-by: Fangzhi Zuo Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/modules/freesync/freesync.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c index ce421bcddcb0..1aae46d703ba 100644 --- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c +++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c @@ -1260,6 +1260,17 @@ void mod_freesync_handle_v_update(struct mod_freesync *mod_freesync, update_v_total_for_static_ramp( core_freesync, stream, in_out_vrr); } + + /* + * If VRR is inactive, set vtotal min and max to nominal vtotal + */ + if (in_out_vrr->state == VRR_STATE_INACTIVE) { + in_out_vrr->adjust.v_total_min = + mod_freesync_calc_v_total_from_refresh(stream, + in_out_vrr->max_refresh_in_uhz); + in_out_vrr->adjust.v_total_max = in_out_vrr->adjust.v_total_min; + return; + } } unsigned long long mod_freesync_calc_nominal_field_rate( From f64ec952d2cdf260714279cdf619a6b363faba33 Mon Sep 17 00:00:00 2001 From: George Shen Date: Mon, 6 Oct 2025 11:23:31 -0400 Subject: [PATCH 17/67] drm/amd/display: Add interface to capture power feature status for debug logging [Why] The status of various power features is often important information when debugging certain issues, such as underflow. This info helps to narrow down the potential sources of errors. [How] Add dc interface to capture power feature enablement status. Reviewed-by: Dillon Varone Signed-off-by: George Shen Signed-off-by: Fangzhi Zuo Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 7 +++++++ drivers/gpu/drm/amd/display/dc/dc.h | 14 ++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index f519e5893a68..5e79962c5f2b 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -6421,6 +6421,13 @@ void dc_get_underflow_debug_data_for_otg(struct dc *dc, int primary_otg_inst, dc->hwss.get_underflow_debug_data(dc, tg, out_data); } +void dc_get_power_feature_status(struct dc *dc, int primary_otg_inst, + struct power_features *out_data) +{ + out_data->uclk_p_state = dc->current_state->clk_mgr->clks.p_state_change_support; + out_data->fams = dc->current_state->bw_ctx.bw.dcn.clk.fw_based_mclk_switching; +} + void dc_log_preos_dmcub_info(const struct dc *dc) { dc_dmub_srv_log_preos_dmcub_info(dc->ctx->dmub_srv); diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 75b25b2506a8..29805428fe93 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -1865,6 +1865,18 @@ struct dc_underflow_debug_data { struct dcn_dccg_reg_state *dccg_reg_state[MAX_PIPES]; }; +struct power_features { + bool ips; + bool rcg; + bool replay; + bool dds; + bool sprs; + bool psr; + bool fams; + bool mpo; + bool uclk_p_state; +}; + /* * Create a new surface with default parameters; */ @@ -2772,4 +2784,6 @@ bool dc_can_clear_cursor_limit(const struct dc *dc); */ void dc_get_underflow_debug_data_for_otg(struct dc *dc, int primary_otg_inst, struct dc_underflow_debug_data *out_data); +void dc_get_power_feature_status(struct dc *dc, int primary_otg_inst, struct power_features *out_data); + #endif /* DC_INTERFACE_H_ */ From 3953a7ba61bd797e59d0ce27c9c51cfac223884a Mon Sep 17 00:00:00 2001 From: Ian Chen Date: Tue, 13 May 2025 16:38:35 +0800 Subject: [PATCH 18/67] drm/amd/display: Add new SMART POWER OLED interfaces [why && how] To optimize power consumption on certain OLED LED panels by sending MaxCLL per frame to TCON Reviewed-by: Aric Cyr Signed-off-by: Ian Chen Signed-off-by: Fangzhi Zuo Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 89 ++++++++++++++++++++++++ drivers/gpu/drm/amd/display/dc/dc.h | 7 ++ 2 files changed, 96 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 5e79962c5f2b..3999b5835066 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -5977,6 +5977,95 @@ bool dc_process_dmub_aux_transfer_async(struct dc *dc, return true; } +bool dc_smart_power_oled_enable(const struct dc_link *link, bool enable, uint16_t peak_nits, + uint8_t debug_control, uint16_t fixed_CLL, uint32_t triggerline) +{ + bool status = false; + struct dc *dc = link->ctx->dc; + union dmub_rb_cmd cmd; + uint8_t otg_inst = 0; + unsigned int panel_inst = 0; + struct pipe_ctx *pipe_ctx = NULL; + struct resource_context *res_ctx = &link->ctx->dc->current_state->res_ctx; + int i = 0; + + // get panel_inst + if (!dc_get_edp_link_panel_inst(dc, link, &panel_inst)) + return status; + + // get otg_inst + for (i = 0; i < MAX_PIPES; i++) { + if (res_ctx && + res_ctx->pipe_ctx[i].stream && + res_ctx->pipe_ctx[i].stream->link && + res_ctx->pipe_ctx[i].stream->link == link && + res_ctx->pipe_ctx[i].stream->link->connector_signal == SIGNAL_TYPE_EDP) { + pipe_ctx = &res_ctx->pipe_ctx[i]; + //TODO: refactor for multi edp support + break; + } + } + + if (pipe_ctx) + otg_inst = pipe_ctx->stream_res.tg->inst; + + // fill in cmd + memset(&cmd, 0, sizeof(cmd)); + + cmd.smart_power_oled_enable.header.type = DMUB_CMD__SMART_POWER_OLED; + cmd.smart_power_oled_enable.header.sub_type = DMUB_CMD__SMART_POWER_OLED_ENABLE; + cmd.smart_power_oled_enable.header.payload_bytes = + sizeof(struct dmub_rb_cmd_smart_power_oled_enable_data) - sizeof(struct dmub_cmd_header); + cmd.smart_power_oled_enable.header.ret_status = 1; + cmd.smart_power_oled_enable.data.enable = enable; + cmd.smart_power_oled_enable.data.panel_inst = panel_inst; + cmd.smart_power_oled_enable.data.peak_nits = peak_nits; + cmd.smart_power_oled_enable.data.otg_inst = otg_inst; + cmd.smart_power_oled_enable.data.digfe_inst = link->link_enc->preferred_engine; + cmd.smart_power_oled_enable.data.digbe_inst = link->link_enc->transmitter; + + cmd.smart_power_oled_enable.data.debugcontrol = debug_control; + cmd.smart_power_oled_enable.data.triggerline = triggerline; + cmd.smart_power_oled_enable.data.fixed_max_cll = fixed_CLL; + + // send cmd + status = dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + + return status; +} + +bool dc_smart_power_oled_get_max_cll(const struct dc_link *link, unsigned int *pCurrent_MaxCLL) +{ + struct dc *dc = link->ctx->dc; + union dmub_rb_cmd cmd; + bool status = false; + unsigned int panel_inst = 0; + + // get panel_inst + if (!dc_get_edp_link_panel_inst(dc, link, &panel_inst)) + return status; + + // fill in cmd + memset(&cmd, 0, sizeof(cmd)); + + cmd.smart_power_oled_getmaxcll.header.type = DMUB_CMD__SMART_POWER_OLED; + cmd.smart_power_oled_getmaxcll.header.sub_type = DMUB_CMD__SMART_POWER_OLED_GETMAXCLL; + cmd.smart_power_oled_getmaxcll.header.payload_bytes = sizeof(cmd.smart_power_oled_getmaxcll.data); + cmd.smart_power_oled_getmaxcll.header.ret_status = 1; + + cmd.smart_power_oled_getmaxcll.data.input.panel_inst = panel_inst; + + // send cmd and wait for reply + status = dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY); + + if (status) + *pCurrent_MaxCLL = cmd.smart_power_oled_getmaxcll.data.output.current_max_cll; + else + *pCurrent_MaxCLL = 0; + + return status; +} + uint8_t get_link_index_from_dpia_port_index(const struct dc *dc, uint8_t dpia_port_index) { diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 29805428fe93..6b9e510b1ed5 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -2719,6 +2719,13 @@ bool dc_process_dmub_aux_transfer_async(struct dc *dc, uint32_t link_index, struct aux_payload *payload); +/* + * smart power OLED Interfaces + */ +bool dc_smart_power_oled_enable(const struct dc_link *link, bool enable, uint16_t peak_nits, + uint8_t debug_control, uint16_t fixed_CLL, uint32_t triggerline); +bool dc_smart_power_oled_get_max_cll(const struct dc_link *link, unsigned int *pCurrent_MaxCLL); + /* Get dc link index from dpia port index */ uint8_t get_link_index_from_dpia_port_index(const struct dc *dc, uint8_t dpia_port_index); From d7ef56dbfa2836fd83bdd8a1094b7616d715cc7f Mon Sep 17 00:00:00 2001 From: Mohit Bawa Date: Thu, 23 Oct 2025 10:40:41 -0400 Subject: [PATCH 19/67] drm/amd/display: refactor DSC cap calculation for dcn35 why: dcn35 currently uses a hardcoded DSC display clock value which is incorrect for some asic types. Newer DCN versions retrieve dsc display clock from clk_mgr. The same can be done for dcn35. how: Refactor the DSC cap calculation using pre-existing logic. Handle ODM combine requirements in dc_dsc.c. Replace hardcoded display clock with actual value retrieved from clk_mgr. Reviewed-by: Nicholas Kazlauskas Reviewed-by: Charlene Liu Reviewed-by: Wenjing Liu Signed-off-by: Mohit Bawa Signed-off-by: Fangzhi Zuo Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- .../display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c | 30 ++++++++++++++++++ .../drm/amd/display/dc/dsc/dcn35/dcn35_dsc.c | 31 ++++++++++++++++++- 2 files changed, 60 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index 35d20a663d67..dfd0c9505af0 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -1295,6 +1295,35 @@ static void dcn35_update_clocks_fpga(struct clk_mgr *clk_mgr, dcn35_update_clocks_update_dtb_dto(clk_mgr_int, context, clk_mgr->clks.ref_dtbclk_khz); } +static unsigned int dcn35_get_max_clock_khz(struct clk_mgr *clk_mgr_base, enum clk_type clk_type) +{ + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + + unsigned int num_clk_levels; + + switch (clk_type) { + case CLK_TYPE_DISPCLK: + num_clk_levels = clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_dispclk_levels; + return num_clk_levels ? + clk_mgr->base.bw_params->clk_table.entries[num_clk_levels - 1].dispclk_mhz * 1000 : + clk_mgr->base.boot_snapshot.dispclk; + case CLK_TYPE_DPPCLK: + num_clk_levels = clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_dppclk_levels; + return num_clk_levels ? + clk_mgr->base.bw_params->clk_table.entries[num_clk_levels - 1].dppclk_mhz * 1000 : + clk_mgr->base.boot_snapshot.dppclk; + case CLK_TYPE_DSCCLK: + num_clk_levels = clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_dispclk_levels; + return num_clk_levels ? + clk_mgr->base.bw_params->clk_table.entries[num_clk_levels - 1].dispclk_mhz * 1000 / 3 : + clk_mgr->base.boot_snapshot.dispclk / 3; + default: + break; + } + + return 0; +} + static struct clk_mgr_funcs dcn35_funcs = { .get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz, .get_dtb_ref_clk_frequency = dcn31_get_dtb_ref_freq_khz, @@ -1306,6 +1335,7 @@ static struct clk_mgr_funcs dcn35_funcs = { .set_low_power_state = dcn35_set_low_power_state, .exit_low_power_state = dcn35_exit_low_power_state, .is_ips_supported = dcn35_is_ips_supported, + .get_max_clock_khz = dcn35_get_max_clock_khz, }; struct clk_mgr_funcs dcn35_fpga_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dcn35/dcn35_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dcn35/dcn35_dsc.c index f9c6377ac66c..e712985f7abd 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dcn35/dcn35_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dcn35/dcn35_dsc.c @@ -28,9 +28,9 @@ #include "reg_helper.h" static void dsc35_enable(struct display_stream_compressor *dsc, int opp_pipe); +static void dsc35_get_single_enc_caps(struct dsc_enc_caps *dsc_enc_caps, unsigned int max_dscclk_khz); static const struct dsc_funcs dcn35_dsc_funcs = { - .dsc_get_enc_caps = dsc2_get_enc_caps, .dsc_read_state = dsc2_read_state, .dsc_read_reg_state = dsc2_read_reg_state, .dsc_validate_stream = dsc2_validate_stream, @@ -40,6 +40,7 @@ static const struct dsc_funcs dcn35_dsc_funcs = { .dsc_disable = dsc2_disable, .dsc_disconnect = dsc2_disconnect, .dsc_wait_disconnect_pending_clear = dsc2_wait_disconnect_pending_clear, + .dsc_get_single_enc_caps = dsc35_get_single_enc_caps, }; /* Macro definitios for REG_SET macros*/ @@ -111,3 +112,31 @@ void dsc35_set_fgcg(struct dcn20_dsc *dsc20, bool enable) { REG_UPDATE(DSC_TOP_CONTROL, DSC_FGCG_REP_DIS, !enable); } + +void dsc35_get_single_enc_caps(struct dsc_enc_caps *dsc_enc_caps, unsigned int max_dscclk_khz) +{ + dsc_enc_caps->dsc_version = 0x21; /* v1.2 - DP spec defined it in reverse order and we kept it */ + + dsc_enc_caps->slice_caps.bits.NUM_SLICES_1 = 1; + dsc_enc_caps->slice_caps.bits.NUM_SLICES_2 = 1; + dsc_enc_caps->slice_caps.bits.NUM_SLICES_3 = 1; + dsc_enc_caps->slice_caps.bits.NUM_SLICES_4 = 1; + + dsc_enc_caps->lb_bit_depth = 13; + dsc_enc_caps->is_block_pred_supported = true; + + dsc_enc_caps->color_formats.bits.RGB = 1; + dsc_enc_caps->color_formats.bits.YCBCR_444 = 1; + dsc_enc_caps->color_formats.bits.YCBCR_SIMPLE_422 = 1; + dsc_enc_caps->color_formats.bits.YCBCR_NATIVE_422 = 0; + dsc_enc_caps->color_formats.bits.YCBCR_NATIVE_420 = 1; + + dsc_enc_caps->color_depth.bits.COLOR_DEPTH_8_BPC = 1; + dsc_enc_caps->color_depth.bits.COLOR_DEPTH_10_BPC = 1; + dsc_enc_caps->color_depth.bits.COLOR_DEPTH_12_BPC = 1; + + dsc_enc_caps->max_total_throughput_mps = max_dscclk_khz * 3 / 1000; + + dsc_enc_caps->max_slice_width = 5184; /* (including 64 overlap pixels for eDP MSO mode) */ + dsc_enc_caps->bpp_increment_div = 16; /* 1/16th of a bit */ +} From 45de10d2d9366e261e29a16a9e543adb98550521 Mon Sep 17 00:00:00 2001 From: Dominik Kaszewski Date: Mon, 20 Oct 2025 15:16:07 +0200 Subject: [PATCH 20/67] drm/amd/display: Change lock descriptor values [Why] Review of usage scenarios requires dc_lock_descriptor modification. [How] Replace STATE/LINK/STREAM/PLANE with GLOBAL/STREAM/LINK, where the first means all streams to be locked. Reviewed-by: Nicholas Kazlauskas Signed-off-by: Dominik Kaszewski Signed-off-by: Fangzhi Zuo Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 162 +++++++++++------------ drivers/gpu/drm/amd/display/dc/dc.h | 5 +- 2 files changed, 80 insertions(+), 87 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 3999b5835066..819c08b59d4e 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -2670,41 +2670,42 @@ static struct surface_update_descriptor get_plane_info_update_type(const struct if (!u->plane_info) return update_type; - elevate_update_type(&update_type, UPDATE_TYPE_FAST, LOCK_DESCRIPTOR_PLANE); + // `plane_info` present means at least `STREAM` lock is required + elevate_update_type(&update_type, UPDATE_TYPE_FAST, LOCK_DESCRIPTOR_STREAM); if (u->plane_info->color_space != u->surface->color_space) { update_flags->bits.color_space_change = 1; - elevate_update_type(&update_type, UPDATE_TYPE_MED, LOCK_DESCRIPTOR_STATE); + elevate_update_type(&update_type, UPDATE_TYPE_MED, LOCK_DESCRIPTOR_STREAM); } if (u->plane_info->horizontal_mirror != u->surface->horizontal_mirror) { update_flags->bits.horizontal_mirror_change = 1; - elevate_update_type(&update_type, UPDATE_TYPE_MED, LOCK_DESCRIPTOR_STATE); + elevate_update_type(&update_type, UPDATE_TYPE_MED, LOCK_DESCRIPTOR_STREAM); } if (u->plane_info->rotation != u->surface->rotation) { update_flags->bits.rotation_change = 1; - elevate_update_type(&update_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_STATE); + elevate_update_type(&update_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_GLOBAL); } if (u->plane_info->format != u->surface->format) { update_flags->bits.pixel_format_change = 1; - elevate_update_type(&update_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_STATE); + elevate_update_type(&update_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_GLOBAL); } if (u->plane_info->stereo_format != u->surface->stereo_format) { update_flags->bits.stereo_format_change = 1; - elevate_update_type(&update_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_STATE); + elevate_update_type(&update_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_GLOBAL); } if (u->plane_info->per_pixel_alpha != u->surface->per_pixel_alpha) { update_flags->bits.per_pixel_alpha_change = 1; - elevate_update_type(&update_type, UPDATE_TYPE_MED, LOCK_DESCRIPTOR_STATE); + elevate_update_type(&update_type, UPDATE_TYPE_MED, LOCK_DESCRIPTOR_STREAM); } if (u->plane_info->global_alpha_value != u->surface->global_alpha_value) { update_flags->bits.global_alpha_change = 1; - elevate_update_type(&update_type, UPDATE_TYPE_MED, LOCK_DESCRIPTOR_STATE); + elevate_update_type(&update_type, UPDATE_TYPE_MED, LOCK_DESCRIPTOR_STREAM); } if (u->plane_info->dcc.enable != u->surface->dcc.enable @@ -2716,7 +2717,7 @@ static struct surface_update_descriptor get_plane_info_update_type(const struct * recalculate stutter period. */ update_flags->bits.dcc_change = 1; - elevate_update_type(&update_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_STATE); + elevate_update_type(&update_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_GLOBAL); } if (resource_pixel_format_to_bpp(u->plane_info->format) != @@ -2725,34 +2726,34 @@ static struct surface_update_descriptor get_plane_info_update_type(const struct * and DML calculation */ update_flags->bits.bpp_change = 1; - elevate_update_type(&update_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_STATE); + elevate_update_type(&update_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_GLOBAL); } if (u->plane_info->plane_size.surface_pitch != u->surface->plane_size.surface_pitch || u->plane_info->plane_size.chroma_pitch != u->surface->plane_size.chroma_pitch) { update_flags->bits.plane_size_change = 1; - elevate_update_type(&update_type, UPDATE_TYPE_MED, LOCK_DESCRIPTOR_STATE); + elevate_update_type(&update_type, UPDATE_TYPE_MED, LOCK_DESCRIPTOR_STREAM); } const struct dc_tiling_info *tiling = &u->plane_info->tiling_info; if (memcmp(tiling, &u->surface->tiling_info, sizeof(*tiling)) != 0) { update_flags->bits.swizzle_change = 1; - elevate_update_type(&update_type, UPDATE_TYPE_MED, LOCK_DESCRIPTOR_STATE); + elevate_update_type(&update_type, UPDATE_TYPE_MED, LOCK_DESCRIPTOR_STREAM); switch (tiling->gfxversion) { case DcGfxVersion9: case DcGfxVersion10: case DcGfxVersion11: if (tiling->gfx9.swizzle != DC_SW_LINEAR) { - elevate_update_type(&update_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_STATE); update_flags->bits.bandwidth_change = 1; + elevate_update_type(&update_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_GLOBAL); } break; case DcGfxAddr3: if (tiling->gfx_addr3.swizzle != DC_ADDR3_SW_LINEAR) { - elevate_update_type(&update_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_STATE); update_flags->bits.bandwidth_change = 1; + elevate_update_type(&update_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_GLOBAL); } break; case DcGfxVersion7: @@ -2777,7 +2778,8 @@ static struct surface_update_descriptor get_scaling_info_update_type( if (!u->scaling_info) return update_type; - elevate_update_type(&update_type, UPDATE_TYPE_FAST, LOCK_DESCRIPTOR_PLANE); + // `scaling_info` present means at least `STREAM` lock is required + elevate_update_type(&update_type, UPDATE_TYPE_FAST, LOCK_DESCRIPTOR_STREAM); if (u->scaling_info->src_rect.width != u->surface->src_rect.width || u->scaling_info->src_rect.height != u->surface->src_rect.height @@ -2788,6 +2790,7 @@ static struct surface_update_descriptor get_scaling_info_update_type( || u->scaling_info->scaling_quality.integer_scaling != u->surface->scaling_quality.integer_scaling) { update_flags->bits.scaling_change = 1; + elevate_update_type(&update_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_GLOBAL); if (u->scaling_info->src_rect.width > u->surface->src_rect.width || u->scaling_info->src_rect.height > u->surface->src_rect.height) @@ -2813,17 +2816,10 @@ static struct surface_update_descriptor get_scaling_info_update_type( || u->scaling_info->clip_rect.x != u->surface->clip_rect.x || u->scaling_info->clip_rect.y != u->surface->clip_rect.y || u->scaling_info->dst_rect.x != u->surface->dst_rect.x - || u->scaling_info->dst_rect.y != u->surface->dst_rect.y) + || u->scaling_info->dst_rect.y != u->surface->dst_rect.y) { + elevate_update_type(&update_type, UPDATE_TYPE_MED, LOCK_DESCRIPTOR_STREAM); update_flags->bits.position_change = 1; - - /* process every update flag before returning */ - if (update_flags->bits.clock_change - || update_flags->bits.bandwidth_change - || update_flags->bits.scaling_change) - elevate_update_type(&update_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_STATE); - - if (update_flags->bits.position_change) - elevate_update_type(&update_type, UPDATE_TYPE_MED, LOCK_DESCRIPTOR_STATE); + } return update_type; } @@ -2837,7 +2833,7 @@ static struct surface_update_descriptor det_surface_update( if (u->surface->force_full_update) { update_flags->raw = 0xFFFFFFFF; - elevate_update_type(&overall_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_STATE); + elevate_update_type(&overall_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_GLOBAL); return overall_type; } @@ -2852,76 +2848,69 @@ static struct surface_update_descriptor det_surface_update( if (u->flip_addr) { update_flags->bits.addr_update = 1; + elevate_update_type(&overall_type, UPDATE_TYPE_FAST, LOCK_DESCRIPTOR_STREAM); + if (u->flip_addr->address.tmz_surface != u->surface->address.tmz_surface) { update_flags->bits.tmz_changed = 1; - elevate_update_type(&overall_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_STATE); + elevate_update_type(&overall_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_GLOBAL); } } - if (u->in_transfer_func) + if (u->in_transfer_func) { update_flags->bits.in_transfer_func_change = 1; - - if (u->input_csc_color_matrix) - update_flags->bits.input_csc_change = 1; - - if (u->coeff_reduction_factor) - update_flags->bits.coeff_reduction_change = 1; - - if (u->gamut_remap_matrix) - update_flags->bits.gamut_remap_change = 1; - - if (u->blend_tf) - update_flags->bits.gamma_change = 1; - - if (u->gamma) { - enum surface_pixel_format format = SURFACE_PIXEL_FORMAT_GRPH_BEGIN; - - if (u->plane_info) - format = u->plane_info->format; - else - format = u->surface->format; - - if (dce_use_lut(format)) - update_flags->bits.gamma_change = 1; + elevate_update_type(&overall_type, UPDATE_TYPE_FAST, LOCK_DESCRIPTOR_STREAM); } - if (u->lut3d_func || u->func_shaper) + if (u->input_csc_color_matrix) { + update_flags->bits.input_csc_change = 1; + elevate_update_type(&overall_type, UPDATE_TYPE_FAST, LOCK_DESCRIPTOR_STREAM); + } + + if (u->coeff_reduction_factor) { + update_flags->bits.coeff_reduction_change = 1; + elevate_update_type(&overall_type, UPDATE_TYPE_FAST, LOCK_DESCRIPTOR_STREAM); + } + + if (u->gamut_remap_matrix) { + update_flags->bits.gamut_remap_change = 1; + elevate_update_type(&overall_type, UPDATE_TYPE_FAST, LOCK_DESCRIPTOR_STREAM); + } + + if (u->blend_tf || (u->gamma && dce_use_lut(u->plane_info ? u->plane_info->format : u->surface->format))) { + update_flags->bits.gamma_change = 1; + elevate_update_type(&overall_type, UPDATE_TYPE_FAST, LOCK_DESCRIPTOR_STREAM); + } + + if (u->lut3d_func || u->func_shaper) { update_flags->bits.lut_3d = 1; + elevate_update_type(&overall_type, UPDATE_TYPE_FAST, LOCK_DESCRIPTOR_STREAM); + } if (u->hdr_mult.value) if (u->hdr_mult.value != u->surface->hdr_mult.value) { - update_flags->bits.hdr_mult = 1; // TODO: Should be fast? - elevate_update_type(&overall_type, UPDATE_TYPE_MED, LOCK_DESCRIPTOR_STATE); + update_flags->bits.hdr_mult = 1; + elevate_update_type(&overall_type, UPDATE_TYPE_MED, LOCK_DESCRIPTOR_STREAM); } if (u->sdr_white_level_nits) if (u->sdr_white_level_nits != u->surface->sdr_white_level_nits) { - update_flags->bits.sdr_white_level_nits = 1; // TODO: Should be fast? - elevate_update_type(&overall_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_STATE); + update_flags->bits.sdr_white_level_nits = 1; + elevate_update_type(&overall_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_GLOBAL); } if (u->cm2_params) { - if ((u->cm2_params->component_settings.shaper_3dlut_setting - != u->surface->mcm_shaper_3dlut_setting) - || (u->cm2_params->component_settings.lut1d_enable - != u->surface->mcm_lut1d_enable)) + if (u->cm2_params->component_settings.shaper_3dlut_setting != u->surface->mcm_shaper_3dlut_setting + || u->cm2_params->component_settings.lut1d_enable != u->surface->mcm_lut1d_enable + || u->cm2_params->cm2_luts.lut3d_data.lut3d_src != u->surface->mcm_luts.lut3d_data.lut3d_src) { update_flags->bits.mcm_transfer_function_enable_change = 1; - if (u->cm2_params->cm2_luts.lut3d_data.lut3d_src - != u->surface->mcm_luts.lut3d_data.lut3d_src) - update_flags->bits.mcm_transfer_function_enable_change = 1; - } - if (update_flags->bits.in_transfer_func_change) { - // TODO: Fast? - elevate_update_type(&overall_type, UPDATE_TYPE_MED, LOCK_DESCRIPTOR_STATE); + elevate_update_type(&overall_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_GLOBAL); + } } if (update_flags->bits.lut_3d && u->surface->mcm_luts.lut3d_data.lut3d_src != DC_CM2_TRANSFER_FUNC_SOURCE_VIDMEM) { - elevate_update_type(&overall_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_STATE); - } - if (update_flags->bits.mcm_transfer_function_enable_change) { - elevate_update_type(&overall_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_STATE); + elevate_update_type(&overall_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_GLOBAL); } if (check_config->enable_legacy_fast_update && @@ -2929,7 +2918,7 @@ static struct surface_update_descriptor det_surface_update( update_flags->bits.gamut_remap_change || update_flags->bits.input_csc_change || update_flags->bits.coeff_reduction_change)) { - elevate_update_type(&overall_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_STATE); + elevate_update_type(&overall_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_GLOBAL); } return overall_type; } @@ -2966,19 +2955,19 @@ static struct surface_update_descriptor check_update_surfaces_for_stream( struct surface_update_descriptor overall_type = { UPDATE_TYPE_FAST, LOCK_DESCRIPTOR_NONE }; if (stream_update && stream_update->pending_test_pattern) { - elevate_update_type(&overall_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_STATE); + elevate_update_type(&overall_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_GLOBAL); } if (stream_update && stream_update->hw_cursor_req) { - elevate_update_type(&overall_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_STATE); + elevate_update_type(&overall_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_GLOBAL); } /* some stream updates require passive update */ if (stream_update) { - union stream_update_flags *su_flags = &stream_update->stream->update_flags; - elevate_update_type(&overall_type, UPDATE_TYPE_FAST, LOCK_DESCRIPTOR_STREAM); + union stream_update_flags *su_flags = &stream_update->stream->update_flags; + if ((stream_update->src.height != 0 && stream_update->src.width != 0) || (stream_update->dst.height != 0 && stream_update->dst.width != 0) || stream_update->integer_scaling_update) @@ -2990,8 +2979,10 @@ static struct surface_update_descriptor check_update_surfaces_for_stream( if (stream_update->abm_level) su_flags->bits.abm_level = 1; - if (stream_update->dpms_off) + if (stream_update->dpms_off) { su_flags->bits.dpms_off = 1; + elevate_update_type(&overall_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_GLOBAL | LOCK_DESCRIPTOR_LINK); + } if (stream_update->gamut_remap) su_flags->bits.gamut_remap = 1; @@ -3019,17 +3010,20 @@ static struct surface_update_descriptor check_update_surfaces_for_stream( if (stream_update->output_color_space) su_flags->bits.out_csc = 1; - if (su_flags->raw != 0) - elevate_update_type(&overall_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_STATE); + // TODO: Make each elevation explicit, as to not override fast stream in crct_timing_adjust + if (su_flags->raw) + elevate_update_type(&overall_type, UPDATE_TYPE_FULL, LOCK_DESCRIPTOR_GLOBAL); - if (stream_update->output_csc_transform) + // Non-global cases + if (stream_update->output_csc_transform) { su_flags->bits.out_csc = 1; + elevate_update_type(&overall_type, UPDATE_TYPE_FAST, LOCK_DESCRIPTOR_STREAM); + } - /* Output transfer function changes do not require bandwidth recalculation, - * so don't trigger a full update - */ - if (!check_config->enable_legacy_fast_update && stream_update->out_transfer_func) + if (!check_config->enable_legacy_fast_update && stream_update->out_transfer_func) { su_flags->bits.out_tf = 1; + elevate_update_type(&overall_type, UPDATE_TYPE_FAST, LOCK_DESCRIPTOR_STREAM); + } } for (int i = 0 ; i < surface_count; i++) { diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 6b9e510b1ed5..a29f7dee1bc8 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -469,10 +469,9 @@ enum surface_update_type { enum dc_lock_descriptor { LOCK_DESCRIPTOR_NONE = 0x0, - LOCK_DESCRIPTOR_STATE = 0x1, + LOCK_DESCRIPTOR_STREAM = 0x1, LOCK_DESCRIPTOR_LINK = 0x2, - LOCK_DESCRIPTOR_STREAM = 0x4, - LOCK_DESCRIPTOR_PLANE = 0x8, + LOCK_DESCRIPTOR_GLOBAL = 0x4, }; struct surface_update_descriptor { From f3f48d6ce5089b183fdea025a8a7798a220168f6 Mon Sep 17 00:00:00 2001 From: Chuntao Tso Date: Fri, 31 Oct 2025 10:02:51 +0800 Subject: [PATCH 21/67] drm/amd/display: To support Replay frame skip mode [Why & How] The change is to optimize the Replay power saving by reducing the refresh rate with frame skipping mode Reviewed-by: Robin Chen Signed-off-by: Chuntao Tso Signed-off-by: Fangzhi Zuo Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- .../amd/display/amdgpu_dm/amdgpu_dm_replay.c | 2 +- drivers/gpu/drm/amd/display/dc/dc_types.h | 6 ++++ .../gpu/drm/amd/display/dc/dce/dmub_replay.c | 7 +++-- .../gpu/drm/amd/display/dc/dce/dmub_replay.h | 5 ++-- .../gpu/drm/amd/display/dc/inc/link_service.h | 4 +-- .../link/protocols/link_edp_panel_control.c | 17 +++++++---- .../link/protocols/link_edp_panel_control.h | 4 +-- .../amd/display/modules/power/power_helpers.c | 30 +++++++++++++++++++ .../amd/display/modules/power/power_helpers.h | 5 ++++ 9 files changed, 65 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_replay.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_replay.c index 80704d709e44..da94e3544b65 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_replay.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_replay.c @@ -162,7 +162,7 @@ bool amdgpu_dm_replay_enable(struct dc_stream_state *stream, bool wait) if (link) { link->dc->link_srv->edp_setup_replay(link, stream); - link->dc->link_srv->edp_set_coasting_vtotal(link, stream->timing.v_total); + link->dc->link_srv->edp_set_coasting_vtotal(link, stream->timing.v_total, 0); DRM_DEBUG_DRIVER("Enabling replay...\n"); link->dc->link_srv->edp_set_replay_allow_active(link, &replay_active, wait, false, NULL); return true; diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index ea6b71c43d2c..0495e6cfcca0 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -1184,6 +1184,10 @@ struct replay_settings { uint32_t coasting_vtotal_table[PR_COASTING_TYPE_NUM]; /* Defer Update Coasting vtotal table */ uint32_t defer_update_coasting_vtotal_table[PR_COASTING_TYPE_NUM]; + /* Skip frame number table */ + uint32_t frame_skip_number_table[PR_COASTING_TYPE_NUM]; + /* Defer skip frame number table */ + uint32_t defer_frame_skip_number_table[PR_COASTING_TYPE_NUM]; /* Maximum link off frame count */ uint32_t link_off_frame_count; /* Replay pseudo vtotal for low refresh rate*/ @@ -1192,6 +1196,8 @@ struct replay_settings { uint16_t last_pseudo_vtotal; /* Replay desync error */ uint32_t replay_desync_error_fail_count; + /* The frame skip number dal send to DMUB */ + uint16_t frame_skip_number; }; /* To split out "global" and "per-panel" config settings. diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c index f9542edff14b..fd8244c94687 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c @@ -213,7 +213,8 @@ static bool dmub_replay_copy_settings(struct dmub_replay *dmub, */ static void dmub_replay_set_coasting_vtotal(struct dmub_replay *dmub, uint32_t coasting_vtotal, - uint8_t panel_inst) + uint8_t panel_inst, + uint16_t frame_skip_number) { union dmub_rb_cmd cmd; struct dc_context *dc = dmub->ctx; @@ -227,6 +228,7 @@ static void dmub_replay_set_coasting_vtotal(struct dmub_replay *dmub, pCmd->header.payload_bytes = sizeof(struct dmub_cmd_replay_set_coasting_vtotal_data); pCmd->replay_set_coasting_vtotal_data.coasting_vtotal = (coasting_vtotal & 0xFFFF); pCmd->replay_set_coasting_vtotal_data.coasting_vtotal_high = (coasting_vtotal & 0xFFFF0000) >> 16; + pCmd->replay_set_coasting_vtotal_data.frame_skip_number = frame_skip_number; dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } @@ -283,7 +285,7 @@ static void dmub_replay_residency(struct dmub_replay *dmub, uint8_t panel_inst, * Set REPLAY power optimization flags and coasting vtotal. */ static void dmub_replay_set_power_opt_and_coasting_vtotal(struct dmub_replay *dmub, - unsigned int power_opt, uint8_t panel_inst, uint32_t coasting_vtotal) + unsigned int power_opt, uint8_t panel_inst, uint32_t coasting_vtotal, uint16_t frame_skip_number) { union dmub_rb_cmd cmd; struct dc_context *dc = dmub->ctx; @@ -301,6 +303,7 @@ static void dmub_replay_set_power_opt_and_coasting_vtotal(struct dmub_replay *dm pCmd->replay_set_power_opt_data.panel_inst = panel_inst; pCmd->replay_set_coasting_vtotal_data.coasting_vtotal = (coasting_vtotal & 0xFFFF); pCmd->replay_set_coasting_vtotal_data.coasting_vtotal_high = (coasting_vtotal & 0xFFFF0000) >> 16; + pCmd->replay_set_coasting_vtotal_data.frame_skip_number = frame_skip_number; dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h index e6346c0ffc0e..07c79739a980 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h @@ -27,11 +27,12 @@ struct dmub_replay_funcs { void (*replay_send_cmd)(struct dmub_replay *dmub, enum replay_FW_Message_type msg, union dmub_replay_cmd_set *cmd_element); void (*replay_set_coasting_vtotal)(struct dmub_replay *dmub, uint32_t coasting_vtotal, - uint8_t panel_inst); + uint8_t panel_inst, uint16_t frame_skip_number); void (*replay_residency)(struct dmub_replay *dmub, uint8_t panel_inst, uint32_t *residency, const bool is_start, const enum pr_residency_mode mode); void (*replay_set_power_opt_and_coasting_vtotal)(struct dmub_replay *dmub, - unsigned int power_opt, uint8_t panel_inst, uint32_t coasting_vtotal); + unsigned int power_opt, uint8_t panel_inst, uint32_t coasting_vtotal, + uint16_t frame_skip_number); }; struct dmub_replay *dmub_replay_create(struct dc_context *ctx); diff --git a/drivers/gpu/drm/amd/display/dc/inc/link_service.h b/drivers/gpu/drm/amd/display/dc/inc/link_service.h index 1e34e84160aa..6f94e48a24d1 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/link_service.h +++ b/drivers/gpu/drm/amd/display/dc/inc/link_service.h @@ -292,12 +292,12 @@ struct link_service { enum replay_FW_Message_type msg, union dmub_replay_cmd_set *cmd_data); bool (*edp_set_coasting_vtotal)( - struct dc_link *link, uint32_t coasting_vtotal); + struct dc_link *link, uint32_t coasting_vtotal, uint16_t frame_skip_number); bool (*edp_replay_residency)(const struct dc_link *link, unsigned int *residency, const bool is_start, const enum pr_residency_mode mode); bool (*edp_set_replay_power_opt_and_coasting_vtotal)(struct dc_link *link, - const unsigned int *power_opts, uint32_t coasting_vtotal); + const unsigned int *power_opts, uint32_t coasting_vtotal, uint16_t frame_skip_number); bool (*edp_wait_for_t12)(struct dc_link *link); bool (*edp_is_ilr_optimization_required)(struct dc_link *link, diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c index 5e806edbb9f6..9391c75a30e5 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c @@ -1110,7 +1110,7 @@ bool edp_send_replay_cmd(struct dc_link *link, return true; } -bool edp_set_coasting_vtotal(struct dc_link *link, uint32_t coasting_vtotal) +bool edp_set_coasting_vtotal(struct dc_link *link, uint32_t coasting_vtotal, uint16_t frame_skip_number) { struct dc *dc = link->ctx->dc; struct dmub_replay *replay = dc->res_pool->replay; @@ -1122,9 +1122,11 @@ bool edp_set_coasting_vtotal(struct dc_link *link, uint32_t coasting_vtotal) if (!dc_get_edp_link_panel_inst(dc, link, &panel_inst)) return false; - if (coasting_vtotal && link->replay_settings.coasting_vtotal != coasting_vtotal) { - replay->funcs->replay_set_coasting_vtotal(replay, coasting_vtotal, panel_inst); + if (coasting_vtotal && (link->replay_settings.coasting_vtotal != coasting_vtotal || + link->replay_settings.frame_skip_number != frame_skip_number)) { + replay->funcs->replay_set_coasting_vtotal(replay, coasting_vtotal, panel_inst, frame_skip_number); link->replay_settings.coasting_vtotal = coasting_vtotal; + link->replay_settings.frame_skip_number = frame_skip_number; } return true; @@ -1152,7 +1154,7 @@ bool edp_replay_residency(const struct dc_link *link, } bool edp_set_replay_power_opt_and_coasting_vtotal(struct dc_link *link, - const unsigned int *power_opts, uint32_t coasting_vtotal) + const unsigned int *power_opts, uint32_t coasting_vtotal, uint16_t frame_skip_number) { struct dc *dc = link->ctx->dc; struct dmub_replay *replay = dc->res_pool->replay; @@ -1163,13 +1165,16 @@ bool edp_set_replay_power_opt_and_coasting_vtotal(struct dc_link *link, /* Only both power and coasting vtotal changed, this func could return true */ if (power_opts && link->replay_settings.replay_power_opt_active != *power_opts && - coasting_vtotal && link->replay_settings.coasting_vtotal != coasting_vtotal) { + (coasting_vtotal && + (link->replay_settings.coasting_vtotal != coasting_vtotal || + link->replay_settings.frame_skip_number != frame_skip_number))) { if (link->replay_settings.replay_feature_enabled && replay->funcs->replay_set_power_opt_and_coasting_vtotal) { replay->funcs->replay_set_power_opt_and_coasting_vtotal(replay, - *power_opts, panel_inst, coasting_vtotal); + *power_opts, panel_inst, coasting_vtotal, frame_skip_number); link->replay_settings.replay_power_opt_active = *power_opts; link->replay_settings.coasting_vtotal = coasting_vtotal; + link->replay_settings.frame_skip_number = frame_skip_number; } else return false; } else diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h index 62a6344e613e..dd79c7cd2828 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h @@ -59,12 +59,12 @@ bool edp_setup_replay(struct dc_link *link, bool edp_send_replay_cmd(struct dc_link *link, enum replay_FW_Message_type msg, union dmub_replay_cmd_set *cmd_data); -bool edp_set_coasting_vtotal(struct dc_link *link, uint32_t coasting_vtotal); +bool edp_set_coasting_vtotal(struct dc_link *link, uint32_t coasting_vtotal, uint16_t frame_skip_number); bool edp_replay_residency(const struct dc_link *link, unsigned int *residency, const bool is_start, const enum pr_residency_mode mode); bool edp_get_replay_state(const struct dc_link *link, uint64_t *state); bool edp_set_replay_power_opt_and_coasting_vtotal(struct dc_link *link, - const unsigned int *power_opts, uint32_t coasting_vtotal); + const unsigned int *power_opts, uint32_t coasting_vtotal, uint16_t frame_skip_number); bool edp_wait_for_t12(struct dc_link *link); bool edp_is_ilr_optimization_required(struct dc_link *link, struct dc_crtc_timing *crtc_timing); diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c index 29ccd3532d13..88b5b716a084 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c @@ -975,6 +975,34 @@ bool psr_su_set_dsc_slice_height(struct dc *dc, struct dc_link *link, return true; } +void set_replay_frame_skip_number(struct dc_link *link, + enum replay_coasting_vtotal_type type, + uint32_t coasting_vtotal_refresh_rate_mhz, + uint32_t flicker_free_refresh_rate_mhz, + bool is_defer) +{ + uint32_t *frame_skip_number_array = NULL; + uint32_t frame_skip_number = 0; + + if (link == NULL || flicker_free_refresh_rate_mhz == 0 || coasting_vtotal_refresh_rate_mhz == 0) + return; + + if (is_defer) + frame_skip_number_array = link->replay_settings.defer_frame_skip_number_table; + else + frame_skip_number_array = link->replay_settings.frame_skip_number_table; + + if (frame_skip_number_array == NULL) + return; + + frame_skip_number = coasting_vtotal_refresh_rate_mhz / flicker_free_refresh_rate_mhz; + + if (frame_skip_number >= 1) + frame_skip_number_array[type] = frame_skip_number - 1; + else + frame_skip_number_array[type] = 0; +} + void set_replay_defer_update_coasting_vtotal(struct dc_link *link, enum replay_coasting_vtotal_type type, uint32_t vtotal) @@ -987,6 +1015,8 @@ void update_replay_coasting_vtotal_from_defer(struct dc_link *link, { link->replay_settings.coasting_vtotal_table[type] = link->replay_settings.defer_update_coasting_vtotal_table[type]; + link->replay_settings.frame_skip_number_table[type] = + link->replay_settings.defer_frame_skip_number_table[type]; } void set_replay_coasting_vtotal(struct dc_link *link, diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.h b/drivers/gpu/drm/amd/display/modules/power/power_helpers.h index 391209a3bf29..87d31d9dce5a 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.h +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.h @@ -60,6 +60,11 @@ void set_replay_coasting_vtotal(struct dc_link *link, void set_replay_defer_update_coasting_vtotal(struct dc_link *link, enum replay_coasting_vtotal_type type, uint32_t vtotal); +void set_replay_frame_skip_number(struct dc_link *link, + enum replay_coasting_vtotal_type type, + uint32_t coasting_vtotal_refresh_rate_Mhz, + uint32_t flicker_free_refresh_rate_Mhz, + bool is_defer); void update_replay_coasting_vtotal_from_defer(struct dc_link *link, enum replay_coasting_vtotal_type type); void set_replay_low_rr_full_screen_video_src_vtotal(struct dc_link *link, uint16_t vtotal); From b6fffcc4530a83598c672641b55d527082c454f8 Mon Sep 17 00:00:00 2001 From: Dominik Kaszewski Date: Fri, 31 Oct 2025 09:35:58 +0100 Subject: [PATCH 22/67] drm/amd/display: Revert in_transfer_func_change to MED [Why] Last commit accidentally changed handling of in_transfer_func_change from MED to FAST. [How] * Revert the line. Reviewed-by: Nicholas Kazlauskas Signed-off-by: Dominik Kaszewski Signed-off-by: Fangzhi Zuo Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 819c08b59d4e..5764f15c16b7 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -2857,7 +2857,7 @@ static struct surface_update_descriptor det_surface_update( } if (u->in_transfer_func) { update_flags->bits.in_transfer_func_change = 1; - elevate_update_type(&overall_type, UPDATE_TYPE_FAST, LOCK_DESCRIPTOR_STREAM); + elevate_update_type(&overall_type, UPDATE_TYPE_MED, LOCK_DESCRIPTOR_STREAM); } if (u->input_csc_color_matrix) { From 401f8f33adc7240baa72fa0fb68ea0552094ede9 Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Fri, 31 Oct 2025 15:25:47 -0400 Subject: [PATCH 23/67] drm/amd/display: dynamically clock gate before and after prefetch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [Why] An invalidation request arriving during prefetch can potentially hang the system if dynamic clock gating is enabled and memory power requests are disabled. [How] • Disable clock gating and enable memory power requests for the duration of the prefetch. • Turn on clock gating and disable memory power requests again after prefetch is complete. Limit the scope for DCN35 and DCN42 only. Reviewed-by: Nicholas Kazlauskas Signed-off-by: Leo Chen Signed-off-by: Fangzhi Zuo Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- .../display/dc/hubbub/dcn31/dcn31_hubbub.c | 7 +-- .../display/dc/hubbub/dcn35/dcn35_hubbub.c | 52 ++++++++++++++++++- .../display/dc/hubbub/dcn35/dcn35_hubbub.h | 1 + .../gpu/drm/amd/display/dc/inc/hw/dchubbub.h | 2 + 4 files changed, 58 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.c index d1aaa58b7db3..5a03758e3de6 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.c @@ -933,8 +933,8 @@ int hubbub31_init_dchub_sys_ctx(struct hubbub *hubbub, dcn20_vmid_setup(&hubbub2->vmid[15], &phys_config); } - - dcn21_dchvm_init(hubbub); + if (hubbub->funcs->dchvm_init) + hubbub->funcs->dchvm_init(hubbub); return NUM_VMID; } @@ -1071,7 +1071,8 @@ static const struct hubbub_funcs hubbub31_funcs = { .program_compbuf_size = dcn31_program_compbuf_size, .init_crb = dcn31_init_crb, .hubbub_read_state = hubbub2_read_state, - .hubbub_read_reg_state = hubbub3_read_reg_state + .hubbub_read_reg_state = hubbub3_read_reg_state, + .dchvm_init = dcn21_dchvm_init }; void hubbub31_construct(struct dcn20_hubbub *hubbub31, diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c index 1b7746a6549a..43ba399f4822 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c @@ -549,6 +549,55 @@ void hubbub35_init(struct hubbub *hubbub) memset(&hubbub2->watermarks.a.cstate_pstate, 0, sizeof(hubbub2->watermarks.a.cstate_pstate)); } +void dcn35_dchvm_init(struct hubbub *hubbub) +{ + struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub); + uint32_t riommu_active; + int i; + + //Init DCHVM block + REG_UPDATE(DCHVM_CTRL0, HOSTVM_INIT_REQ, 1); + + //Poll until RIOMMU_ACTIVE = 1 + for (i = 0; i < 100; i++) { + REG_GET(DCHVM_RIOMMU_STAT0, RIOMMU_ACTIVE, &riommu_active); + + if (riommu_active) + break; + else + udelay(5); + } + + if (riommu_active) { + // Disable gating and memory power requests + REG_UPDATE(DCHVM_MEM_CTRL, HVM_GPUVMRET_PWR_REQ_DIS, 1); + REG_UPDATE_4(DCHVM_CLK_CTRL, + HVM_DISPCLK_R_GATE_DIS, 1, + HVM_DISPCLK_G_GATE_DIS, 1, + HVM_DCFCLK_R_GATE_DIS, 1, + HVM_DCFCLK_G_GATE_DIS, 1); + + //Reflect the power status of DCHUBBUB + REG_UPDATE(DCHVM_RIOMMU_CTRL0, HOSTVM_POWERSTATUS, 1); + + //Start rIOMMU prefetching + REG_UPDATE(DCHVM_RIOMMU_CTRL0, HOSTVM_PREFETCH_REQ, 1); + + //Poll until HOSTVM_PREFETCH_DONE = 1 + REG_WAIT(DCHVM_RIOMMU_STAT0, HOSTVM_PREFETCH_DONE, 1, 5, 100); + + //Enable memory power requests + REG_UPDATE(DCHVM_MEM_CTRL, HVM_GPUVMRET_PWR_REQ_DIS, 0); + // Enable dynamic clock gating + REG_UPDATE_4(DCHVM_CLK_CTRL, + HVM_DISPCLK_R_GATE_DIS, 0, + HVM_DISPCLK_G_GATE_DIS, 0, + HVM_DCFCLK_R_GATE_DIS, 0, + HVM_DCFCLK_G_GATE_DIS, 0); + hubbub->riommu_active = true; + } +} + /*static void hubbub35_set_request_limit(struct hubbub *hubbub, int memory_channel_count, int words_per_channel) @@ -589,7 +638,8 @@ static const struct hubbub_funcs hubbub35_funcs = { .hubbub_read_state = hubbub2_read_state, .force_usr_retraining_allow = hubbub32_force_usr_retraining_allow, .dchubbub_init = hubbub35_init, - .hubbub_read_reg_state = hubbub3_read_reg_state + .hubbub_read_reg_state = hubbub3_read_reg_state, + .dchvm_init = dcn35_dchvm_init }; void hubbub35_construct(struct dcn20_hubbub *hubbub2, diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.h b/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.h index 23fecf88556c..9f65fff1bd4d 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.h +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.h @@ -168,4 +168,5 @@ void dcn35_program_compbuf_size(struct hubbub *hubbub, unsigned int compbuf_size_kb, bool safe_to_increase); void dcn35_init_crb(struct hubbub *hubbub); void hubbub35_init(struct hubbub *hubbub); +void dcn35_dchvm_init(struct hubbub *hubbub); #endif diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h index dafc8490efb5..1ddfa30411c8 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h @@ -252,6 +252,8 @@ struct hubbub_funcs { void (*program_compbuf_segments)(struct hubbub *hubbub, unsigned compbuf_size_seg, bool safe_to_increase); void (*wait_for_det_update)(struct hubbub *hubbub, int hubp_inst); bool (*program_arbiter)(struct hubbub *hubbub, struct dml2_display_arb_regs *arb_regs, bool safe_to_lower); + void (*dchvm_init)(struct hubbub *hubbub); + struct hubbub_perfmon_funcs { void (*reset)(struct hubbub *hubbub); void (*start_measuring_max_memory_latency_ns)( From 45c5fb0be343503fc972dc2696b3897786e96cbb Mon Sep 17 00:00:00 2001 From: Wenjing Liu Date: Tue, 4 Nov 2025 16:40:53 -0500 Subject: [PATCH 24/67] drm/amd/display: Refactor HDCP Status Log Format Add missing part for drm/amd/display: fw locality check refactors Reviewed-by: Aurabindo Pillai Signed-off-by: Wenjing Liu Signed-off-by: Fangzhi Zuo Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.c index 409a7d0e70fa..5cb979c2cf8c 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.c @@ -126,7 +126,6 @@ void mod_hdcp_log_ddc_trace(struct mod_hdcp *hdcp) } #define CASE_FORMAT(entry) case entry: return #entry; - char *mod_hdcp_status_to_str(int32_t status) { switch (status) { From c7ade7cbca6ec0afa14dfdb97f0d5806a7446bee Mon Sep 17 00:00:00 2001 From: Taimur Hassan Date: Fri, 31 Oct 2025 19:00:47 -0400 Subject: [PATCH 25/67] drm/amd/display: [FW Promotion] Release 0.1.35.0 Summary for changes in firmware: * Use panel_inst instead of otg_inst when getting fw state * Contrast strength improves when HDR desktop mode * Ensure pipes have no outstanding HUBP requests prior to IPS RCG entry * Check for vm request and vm idle status in IPS1/2 entry sequence Reviewed-by: Aurabindo Pillai Signed-off-by: Taimur Hassan Signed-off-by: Fangzhi Zuo Tested-by: Dan Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index e956722209ba..377a449fbf8f 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -491,7 +491,13 @@ union replay_debug_flags { */ uint32_t debug_log_enabled : 1; - uint32_t reserved : 17; + /** + * 0x8000 (bit 15) + * @enable_sub_feature_visual_confirm: Enable Sub Feature Visual Confirm + */ + uint32_t enable_sub_feature_visual_confirm : 1; + + uint32_t reserved : 16; } bitfields; uint32_t u32All; @@ -4363,6 +4369,7 @@ enum dmub_cmd_replay_general_subtype { REPLAY_GENERAL_CMD_DISABLED_DESYNC_ERROR_DETECTION, REPLAY_GENERAL_CMD_UPDATE_ERROR_STATUS, REPLAY_GENERAL_CMD_SET_LOW_RR_ACTIVATE, + REPLAY_GENERAL_CMD_VIDEO_CONFERENCING, }; struct dmub_alpm_auxless_data { From 454b0ed8c0db6f107662ce4784caf2659b2f3700 Mon Sep 17 00:00:00 2001 From: Taimur Hassan Date: Fri, 31 Oct 2025 20:07:35 -0500 Subject: [PATCH 26/67] drm/amd/display: Promote DC to 3.2.358 Summary: * Enable VRR when unsynced with the stream * Refactor DSC cap calculation for dcn35 * Add debug log for power feature * Fix fill latency issue * Do not initialize LSDMA if it is not supported by DMU Reviewed-by: Aurabindo Pillai Signed-off-by: Taimur Hassan Signed-off-by: Fangzhi Zuo Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index a29f7dee1bc8..1db7eb8f9a5b 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -63,7 +63,7 @@ struct dcn_dsc_reg_state; struct dcn_optc_reg_state; struct dcn_dccg_reg_state; -#define DC_VER "3.2.357" +#define DC_VER "3.2.358" /** * MAX_SURFACES - representative of the upper bound of surfaces that can be piped to a single CRTC From 73c8c29baac7f0c7e703d92eba009008cbb5228e Mon Sep 17 00:00:00 2001 From: Sultan Alsawaf Date: Fri, 7 Nov 2025 13:07:13 -0500 Subject: [PATCH 27/67] drm/amd/amdgpu: Ensure isp_kernel_buffer_alloc() creates a new BO When the BO pointer provided to amdgpu_bo_create_kernel() points to non-NULL, amdgpu_bo_create_kernel() takes it as a hint to pin that address rather than allocate a new BO. This functionality is never desired for allocating ISP buffers. A new BO should always be created when isp_kernel_buffer_alloc() is called, per the description for isp_kernel_buffer_alloc(). Ensure this by zeroing *bo right before the amdgpu_bo_create_kernel() call. Fixes: 55d42f616976 ("drm/amd/amdgpu: Add helper functions for isp buffers") Reviewed-by: Mario Limonciello (AMD) Reviewed-by: Pratap Nirujogi Signed-off-by: Sultan Alsawaf Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c index 9cddbf50442a..37270c4dab8d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c @@ -280,6 +280,8 @@ int isp_kernel_buffer_alloc(struct device *dev, u64 size, if (ret) return ret; + /* Ensure *bo is NULL so a new BO will be created */ + *bo = NULL; ret = amdgpu_bo_create_kernel(adev, size, ISP_MC_ADDR_ALIGN, From 20161e3b6bd97c2df24da83a1b68f7b17e1d2927 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Fri, 7 Nov 2025 19:36:18 +0530 Subject: [PATCH 28/67] drm/amd/display: Fix annotations for connector poll/detect parameters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds the missing @aconnector, @connector, and @force descriptions: @aconnector – This is the DM (Display Manager) connector. It gives access to the DRM connector, the DC link, and hotplug/poll state. The code uses it to check the link, update the sink, and manage connector state changes. @connector – This is the main DRM connector given by the DRM core. Inside the detect function, it is converted to amdgpu_dm_connector so we can run DC link detection, either light or full. @force – This flag tells the function whether to run a full detect again. If false, we avoid heavy DAC load detect steps to prevent flicker. If true, we force a re-detect even when we normally skip it. Fixes the below with gcc W=1: function param 'aconnector' not described in 'amdgpu_dm_connector_poll' function param 'force' not described in 'amdgpu_dm_connector_poll' function param 'connector' not described in 'amdgpu_dm_connector_detect' function param 'force' not described in 'amdgpu_dm_connector_detect' Cc: Aurabindo Pillai Cc: Roman Li Cc: Harry Wentland Cc: Tom Chung Signed-off-by: Srinivasan Shanmugam Reviewed-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 22 ++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 18f1cf16ec18..e6728fd12eeb 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -7241,10 +7241,18 @@ create_stream_for_sink(struct drm_connector *connector, } /** - * amdgpu_dm_connector_poll() - Poll a connector to see if it's connected to a display + * amdgpu_dm_connector_poll - Poll a connector to see if it's connected to a display + * @aconnector: DM connector to poll (owns @base drm_connector and @dc_link) + * @force: if true, force polling even when DAC load detection was used * - * Used for connectors that don't support HPD (hotplug detection) - * to periodically checked whether the connector is connected to a display. + * Used for connectors that don't support HPD (hotplug detection) to + * periodically check whether the connector is connected to a display. + * + * When connection was determined via DAC load detection, we avoid + * re-running it on normal polls to prevent visible glitches, unless + * @force is set. + * + * Return: The probed connector status (connected/disconnected/unknown). */ static enum drm_connector_status amdgpu_dm_connector_poll(struct amdgpu_dm_connector *aconnector, bool force) @@ -7312,6 +7320,14 @@ amdgpu_dm_connector_poll(struct amdgpu_dm_connector *aconnector, bool force) * 1. This interface is NOT called in context of HPD irq. * 2. This interface *is called* in context of user-mode ioctl. Which * makes it a bad place for *any* MST-related activity. + * + * @connector: The DRM connector we are checking. We convert it to + * amdgpu_dm_connector so we can read the DC link and state. + * @force: If true, do a full detect again. This is used even when + * a lighter check would normally be used to avoid flicker. + * + * Return: The connector status (connected, disconnected, or unknown). + * */ static enum drm_connector_status amdgpu_dm_connector_detect(struct drm_connector *connector, bool force) From fc936aa76eea2610ca387d420f50be2f5ed8823c Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Fri, 7 Nov 2025 19:56:15 +0530 Subject: [PATCH 29/67] drm/amd/display: Add kdoc params/returns in dc/link detection helpers The link detection helpers in dc/link/link_detection.c were missing kdoc annotations for parameters and return values. Fixes the below with gcc W=1: ...link_detection.c:872 parameter 'edid_header' not described ...link_detection.c:890 parameter 'link' not described ...link_detection.c:914 parameter 'link' not described ...link_detection.c:1355 parameter 'link' not described ...link_detection.c:1355 parameter 'type' not described Cc: Aurabindo Pillai Cc: Roman Li Cc: Harry Wentland Cc: Tom Chung Signed-off-by: Srinivasan Shanmugam Reviewed-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/link/link_detection.c | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/link/link_detection.c b/drivers/gpu/drm/amd/display/dc/link/link_detection.c index 5d287874c125..d163360a2bf6 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_detection.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_detection.c @@ -868,6 +868,11 @@ static void verify_link_capability(struct dc_link *link, struct dc_sink *sink, * Evaluates an 8-byte EDID header to check if it's good enough * for the purpose of determining whether a display is connected * without reading the full EDID. + * + * @edid_header: The first 8 bytes of the EDID read from DDC. + * + * Return: true if the header looks valid (>= 6 of 8 bytes match the + * expected 00/FF pattern), false otherwise. */ static bool link_detect_evaluate_edid_header(uint8_t edid_header[8]) { @@ -886,6 +891,11 @@ static bool link_detect_evaluate_edid_header(uint8_t edid_header[8]) * Detect whether a display is connected to DDC without reading full EDID. * Reads only the EDID header (the first 8 bytes of EDID) from DDC and * evaluates whether that matches. + * + * @link: DC link whose DDC/I2C is probed for the EDID header. + * + * Return: true if the EDID header was read and passes validation, + * false otherwise. */ static bool link_detect_ddc_probe(struct dc_link *link) { @@ -910,6 +920,11 @@ static bool link_detect_ddc_probe(struct dc_link *link) * Load detection can be used to detect the presence of an * analog display when we can't read DDC. This causes a visible * visual glitch so it should be used sparingly. + * + * @link: DC link to test using the DAC load-detect path. + * + * Return: true if the VBIOS load-detect call reports OK, false + * otherwise. */ static bool link_detect_dac_load_detect(struct dc_link *link) { @@ -1351,6 +1366,14 @@ static bool detect_link_and_local_sink(struct dc_link *link, /** * link_detect_analog() - Determines if an analog sink is connected. + * + * @link: DC link to evaluate (must support analog signalling). + * @type: Updated with the detected connection type: + * dc_connection_single (analog via DDC), + * dc_connection_dac_load (via load-detect), + * or dc_connection_none. + * + * Return: true if detection completed. */ static bool link_detect_analog(struct dc_link *link, enum dc_connection_type *type) { From efa6bffae52bc8d32b6ae66cd83151d6a31fcf0b Mon Sep 17 00:00:00 2001 From: Asad Kamal Date: Sun, 19 Oct 2025 19:03:51 +0800 Subject: [PATCH 30/67] drm/amd/pm: Update pmfw headers for smu_v13_0_12 Update pmfw headers for smu_v13_0_12 to include ppt1 messages and static parameters Signed-off-by: Asad Kamal Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- .../gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_pmfw.h | 7 ++++++- .../gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_ppsmc.h | 4 +++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_pmfw.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_pmfw.h index fa43d2e229a0..dd30d96e1ca2 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_pmfw.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_pmfw.h @@ -189,7 +189,7 @@ typedef enum { SVI_MAX_TEMP_ENTRIES, // 13 } SVI_TEMP_e; -#define SMU_METRICS_TABLE_VERSION 0x14 +#define SMU_METRICS_TABLE_VERSION 0x15 #define SMU_SYSTEM_METRICS_TABLE_VERSION 0x1 @@ -367,6 +367,11 @@ typedef struct { //Node Power Limit uint32_t MaxNodePowerLimit; + + // PPT1 Configuration + uint32_t PPT1Max; + uint32_t PPT1Min; + uint32_t PPT1Default; } StaticMetricsTable_t; #pragma pack(pop) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_ppsmc.h index fe1b3ac50a75..d09b6ae9827e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_ppsmc.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_ppsmc.h @@ -117,7 +117,9 @@ #define PPSMC_MSG_GetSystemMetricsTable 0x5C #define PPSMC_MSG_GetSystemMetricsVersion 0x5D #define PPSMC_MSG_ResetVCN 0x5E -#define PPSMC_Message_Count 0x5F +#define PPSMC_MSG_SetFastPptLimit 0x5F +#define PPSMC_MSG_GetFastPptLimit 0x60 +#define PPSMC_Message_Count 0x61 //PPSMC Reset Types for driver msg argument #define PPSMC_RESET_TYPE_DRIVER_MODE_1_RESET 0x1 From d88edb246045d3b488f00d0854764cce829c1824 Mon Sep 17 00:00:00 2001 From: Asad Kamal Date: Sun, 19 Oct 2025 23:22:22 +0800 Subject: [PATCH 31/67] drm/amd/pm: Add ppt1 support for smu_v13_0_12 Add support to configure and retrieve ppt1 limit for smu_v13_0_12 v2: Add update_caps function and update ppt1 cap based on max ppt1 value, optimize the return values (Lijo) v3: Add Null ptr check, return not supported in case of invalid level/type (Lijo) Signed-off-by: Asad Kamal Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h | 4 +- .../drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c | 8 ++ .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 80 ++++++++++++++++++- .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h | 4 + 4 files changed, 92 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h index 3a3930ef7ed9..9b71a8afdd35 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h @@ -286,7 +286,9 @@ __SMU_DUMMY_MAP(SetTimestamp), \ __SMU_DUMMY_MAP(GetTimestamp), \ __SMU_DUMMY_MAP(GetBadPageIpid), \ - __SMU_DUMMY_MAP(EraseRasTable), + __SMU_DUMMY_MAP(EraseRasTable), \ + __SMU_DUMMY_MAP(SetFastPptLimit), \ + __SMU_DUMMY_MAP(GetFastPptLimit), #undef __SMU_DUMMY_MAP #define __SMU_DUMMY_MAP(type) SMU_MSG_##type diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c index fc580800609c..9e635f733fbf 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c @@ -148,6 +148,8 @@ const struct cmn2asic_msg_mapping smu_v13_0_12_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(GetTimestamp, PPSMC_MSG_GetTimestamp, 0), MSG_MAP(GetBadPageIpid, PPSMC_MSG_GetBadPageIpIdLoHi, 0), MSG_MAP(EraseRasTable, PPSMC_MSG_EraseRasTable, 0), + MSG_MAP(SetFastPptLimit, PPSMC_MSG_SetFastPptLimit, 1), + MSG_MAP(GetFastPptLimit, PPSMC_MSG_GetFastPptLimit, 1), }; int smu_v13_0_12_tables_init(struct smu_context *smu) @@ -354,6 +356,12 @@ int smu_v13_0_12_setup_driver_pptable(struct smu_context *smu) if (smu_v13_0_6_cap_supported(smu, SMU_CAP(NPM_METRICS))) pptable->MaxNodePowerLimit = SMUQ10_ROUND(static_metrics->MaxNodePowerLimit); + if (smu_v13_0_6_cap_supported(smu, SMU_CAP(FAST_PPT)) && + static_metrics->PPT1Max) { + pptable->PPT1Max = static_metrics->PPT1Max; + pptable->PPT1Min = static_metrics->PPT1Min; + pptable->PPT1Default = static_metrics->PPT1Default; + } smu_v13_0_12_init_xgmi_data(smu, static_metrics); pptable->Init = true; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index dd8c7b98ce7e..f9e04df7e472 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -856,6 +856,17 @@ int smu_v13_0_6_get_static_metrics_table(struct smu_context *smu) return 0; } +static void smu_v13_0_6_update_caps(struct smu_context *smu) +{ + struct smu_table_context *smu_table = &smu->smu_table; + struct PPTable_t *pptable = + (struct PPTable_t *)smu_table->driver_pptable; + + if (smu_v13_0_6_cap_supported(smu, SMU_CAP(FAST_PPT)) && + !pptable->PPT1Max) + smu_v13_0_6_cap_clear(smu, SMU_CAP(FAST_PPT)); +} + static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu) { struct smu_table_context *smu_table = &smu->smu_table; @@ -872,8 +883,12 @@ static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu) uint8_t max_width; if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12) && - smu_v13_0_6_cap_supported(smu, SMU_CAP(STATIC_METRICS))) - return smu_v13_0_12_setup_driver_pptable(smu); + smu_v13_0_6_cap_supported(smu, SMU_CAP(STATIC_METRICS))) { + ret = smu_v13_0_12_setup_driver_pptable(smu); + if (ret) + return ret; + goto out; + } /* Store one-time values in driver PPTable */ if (!pptable->Init) { @@ -953,7 +968,8 @@ static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu) smu_v13_0_6_fill_static_metrics_table(smu, static_metrics); } } - +out: + smu_v13_0_6_update_caps(smu); return 0; } @@ -1887,9 +1903,66 @@ static int smu_v13_0_6_set_power_limit(struct smu_context *smu, enum smu_ppt_limit_type limit_type, uint32_t limit) { + struct smu_table_context *smu_table = &smu->smu_table; + struct PPTable_t *pptable = + (struct PPTable_t *)smu_table->driver_pptable; + int ret; + + if (limit_type == SMU_FAST_PPT_LIMIT) { + if (!smu_v13_0_6_cap_supported(smu, SMU_CAP(FAST_PPT))) + return -EOPNOTSUPP; + if (limit > pptable->PPT1Max || limit < pptable->PPT1Min) { + dev_err(smu->adev->dev, + "New power limit (%d) should be between min %d max %d\n", + limit, pptable->PPT1Min, pptable->PPT1Max); + return -EINVAL; + } + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetFastPptLimit, + limit, NULL); + if (ret) + dev_err(smu->adev->dev, "Set fast PPT limit failed!\n"); + return ret; + } + return smu_v13_0_set_power_limit(smu, limit_type, limit); } +static int smu_v13_0_6_get_ppt_limit(struct smu_context *smu, + uint32_t *ppt_limit, + enum smu_ppt_limit_type type, + enum smu_ppt_limit_level level) +{ + struct smu_table_context *smu_table = &smu->smu_table; + struct PPTable_t *pptable = + (struct PPTable_t *)smu_table->driver_pptable; + int ret = 0; + + if (type == SMU_FAST_PPT_LIMIT) { + if (!smu_v13_0_6_cap_supported(smu, SMU_CAP(FAST_PPT))) + return -EOPNOTSUPP; + switch (level) { + case SMU_PPT_LIMIT_MAX: + *ppt_limit = pptable->PPT1Max; + break; + case SMU_PPT_LIMIT_CURRENT: + ret = smu_cmn_send_smc_msg(smu, SMU_MSG_GetFastPptLimit, ppt_limit); + if (ret) + dev_err(smu->adev->dev, "Get fast PPT limit failed!\n"); + break; + case SMU_PPT_LIMIT_DEFAULT: + *ppt_limit = pptable->PPT1Default; + break; + case SMU_PPT_LIMIT_MIN: + *ppt_limit = pptable->PPT1Min; + break; + default: + return -EOPNOTSUPP; + } + return ret; + } + return -EOPNOTSUPP; +} + static int smu_v13_0_6_irq_process(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) @@ -3959,6 +4032,7 @@ static const struct pptable_funcs smu_v13_0_6_ppt_funcs = { .get_enabled_mask = smu_v13_0_6_get_enabled_mask, .feature_is_enabled = smu_cmn_feature_is_enabled, .set_power_limit = smu_v13_0_6_set_power_limit, + .get_ppt_limit = smu_v13_0_6_get_ppt_limit, .set_xgmi_pstate = smu_v13_0_set_xgmi_pstate, .register_irq_handler = smu_v13_0_6_register_irq_handler, .enable_thermal_alert = smu_v13_0_enable_thermal_alert, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h index 367102cdbf09..6cbdd7c5ded9 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h @@ -50,6 +50,9 @@ struct PPTable_t { uint32_t MinLclkDpmRange; uint64_t PublicSerialNumber_AID; uint32_t MaxNodePowerLimit; + uint32_t PPT1Max; + uint32_t PPT1Min; + uint32_t PPT1Default; bool Init; }; @@ -73,6 +76,7 @@ enum smu_v13_0_6_caps { SMU_CAP(TEMP_METRICS), SMU_CAP(NPM_METRICS), SMU_CAP(RAS_EEPROM), + SMU_CAP(FAST_PPT), SMU_CAP(ALL), }; From 12c958d1db36bcb87467e6a6273f5a56429a0d02 Mon Sep 17 00:00:00 2001 From: Asad Kamal Date: Mon, 20 Oct 2025 03:26:01 +0800 Subject: [PATCH 32/67] drm/amd/pm: Expose ppt1 limit for gc_v9_5_0 Expose power2_cap hwmon node for retrieving and configuring ppt1 limit on supported boards for gc_v9_5_0 v2: Remove version check (Lijo) v3: Remove power2_average (Lijo) v4: Put back power2_average, will be removed separately (Lijo) Signed-off-by: Asad Kamal Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/amdgpu_pm.c | 12 ++++++++---- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 2 ++ 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 40ffaced74fd..c0e497e6b2f6 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -3372,7 +3372,9 @@ static ssize_t amdgpu_hwmon_show_power_label(struct device *dev, to_sensor_dev_attr(attr)->index == PP_PWR_TYPE_FAST ? "fastPPT" : "slowPPT"); else - return sysfs_emit(buf, "PPT\n"); + return sysfs_emit(buf, "%s\n", + to_sensor_dev_attr(attr)->index == PP_PWR_TYPE_FAST ? + "PPT1" : "PPT"); } static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev, @@ -3825,13 +3827,15 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, return 0; /* only Vangogh has fast PPT limit and power labels */ - if (!(gc_ver == IP_VERSION(10, 3, 1)) && - (attr == &sensor_dev_attr_power2_average.dev_attr.attr || + if ((attr == &sensor_dev_attr_power2_average.dev_attr.attr || attr == &sensor_dev_attr_power2_cap_max.dev_attr.attr || attr == &sensor_dev_attr_power2_cap_min.dev_attr.attr || attr == &sensor_dev_attr_power2_cap.dev_attr.attr || attr == &sensor_dev_attr_power2_cap_default.dev_attr.attr || - attr == &sensor_dev_attr_power2_label.dev_attr.attr)) + attr == &sensor_dev_attr_power2_label.dev_attr.attr) && + (amdgpu_dpm_get_power_limit(adev, &tmp, + PP_PWR_LIMIT_MAX, + PP_PWR_TYPE_FAST) == -EOPNOTSUPP)) return 0; return effective_mode; diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 14351ec70701..4a2c3ad0c38a 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -2941,6 +2941,8 @@ int smu_get_power_limit(void *handle, if (limit_type != SMU_DEFAULT_PPT_LIMIT) { if (smu->ppt_funcs->get_ppt_limit) ret = smu->ppt_funcs->get_ppt_limit(smu, limit, limit_type, limit_level); + else + return -EOPNOTSUPP; } else { switch (limit_level) { case SMU_PPT_LIMIT_CURRENT: From af2e61d61b127300f48609bf06e69643aab1c5ef Mon Sep 17 00:00:00 2001 From: Asad Kamal Date: Mon, 20 Oct 2025 04:01:10 +0800 Subject: [PATCH 33/67] drm/amd/pm: Enable ppt1 caps for smu_v13_0_12 Enable ppt1 caps to fetch and configure ppt1 for smu_v13_0_12 Signed-off-by: Asad Kamal Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index f9e04df7e472..44e1cd821eec 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -356,6 +356,9 @@ static void smu_v13_0_12_init_caps(struct smu_context *smu) if (fw_ver > 0x04560900) smu_v13_0_6_cap_set(smu, SMU_CAP(VCN_RESET)); + if (fw_ver >= 0x04560D00) + smu_v13_0_6_cap_set(smu, SMU_CAP(FAST_PPT)); + if (fw_ver >= 0x04560700) { if (fw_ver >= 0x04560900) { smu_v13_0_6_cap_set(smu, SMU_CAP(TEMP_METRICS)); From 9dc8d07ce08364b53bab48271beda9c3fc7c085b Mon Sep 17 00:00:00 2001 From: Asad Kamal Date: Fri, 7 Nov 2025 00:56:32 +0800 Subject: [PATCH 34/67] drm/amd/pm: Remove power2_average node SOC power consumption is reported by power1_average. power2_cap_default/min/max only represent second level limits and don't represent a different type of power or power consumption by a subsection of the SOC. Therefore power2_average does not serve any purpose and hence removing power2_average sysfs node Signed-off-by: Asad Kamal Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/amdgpu_pm.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index c0e497e6b2f6..84ad3ee17907 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -3579,7 +3579,6 @@ static SENSOR_DEVICE_ATTR(power1_cap_min, S_IRUGO, amdgpu_hwmon_show_power_cap_m static SENSOR_DEVICE_ATTR(power1_cap, S_IRUGO | S_IWUSR, amdgpu_hwmon_show_power_cap, amdgpu_hwmon_set_power_cap, 0); static SENSOR_DEVICE_ATTR(power1_cap_default, S_IRUGO, amdgpu_hwmon_show_power_cap_default, NULL, 0); static SENSOR_DEVICE_ATTR(power1_label, S_IRUGO, amdgpu_hwmon_show_power_label, NULL, 0); -static SENSOR_DEVICE_ATTR(power2_average, S_IRUGO, amdgpu_hwmon_show_power_avg, NULL, 1); static SENSOR_DEVICE_ATTR(power2_cap_max, S_IRUGO, amdgpu_hwmon_show_power_cap_max, NULL, 1); static SENSOR_DEVICE_ATTR(power2_cap_min, S_IRUGO, amdgpu_hwmon_show_power_cap_min, NULL, 1); static SENSOR_DEVICE_ATTR(power2_cap, S_IRUGO | S_IWUSR, amdgpu_hwmon_show_power_cap, amdgpu_hwmon_set_power_cap, 1); @@ -3628,7 +3627,6 @@ static struct attribute *hwmon_attributes[] = { &sensor_dev_attr_power1_cap.dev_attr.attr, &sensor_dev_attr_power1_cap_default.dev_attr.attr, &sensor_dev_attr_power1_label.dev_attr.attr, - &sensor_dev_attr_power2_average.dev_attr.attr, &sensor_dev_attr_power2_cap_max.dev_attr.attr, &sensor_dev_attr_power2_cap_min.dev_attr.attr, &sensor_dev_attr_power2_cap.dev_attr.attr, @@ -3827,8 +3825,7 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, return 0; /* only Vangogh has fast PPT limit and power labels */ - if ((attr == &sensor_dev_attr_power2_average.dev_attr.attr || - attr == &sensor_dev_attr_power2_cap_max.dev_attr.attr || + if ((attr == &sensor_dev_attr_power2_cap_max.dev_attr.attr || attr == &sensor_dev_attr_power2_cap_min.dev_attr.attr || attr == &sensor_dev_attr_power2_cap.dev_attr.attr || attr == &sensor_dev_attr_power2_cap_default.dev_attr.attr || From 547985579932c1de13f57f8bcf62cd9361b9d3d3 Mon Sep 17 00:00:00 2001 From: Sathishkumar S Date: Tue, 7 Oct 2025 13:17:51 +0530 Subject: [PATCH 35/67] drm/amdgpu/jpeg: Add parse_cs for JPEG5_0_1 enable parse_cs callback for JPEG5_0_1. Signed-off-by: Sathishkumar S Reviewed-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c index baf097d2e1ac..ab0bf880d3d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c @@ -878,6 +878,7 @@ static const struct amdgpu_ring_funcs jpeg_v5_0_1_dec_ring_vm_funcs = { .get_rptr = jpeg_v5_0_1_dec_ring_get_rptr, .get_wptr = jpeg_v5_0_1_dec_ring_get_wptr, .set_wptr = jpeg_v5_0_1_dec_ring_set_wptr, + .parse_cs = amdgpu_jpeg_dec_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + From 46f2029fe1dbdbb2ff3d6a566b32002660d3944b Mon Sep 17 00:00:00 2001 From: "Jesse.Zhang" Date: Fri, 7 Nov 2025 19:19:08 +0800 Subject: [PATCH 36/67] drm/amdgpu: resume MES scheduling after user queue hang detection and recovery This patch ensures the Micro-Engine Scheduler (MES) is properly resumed after detecting and recovering from a user queue hang condition. Key changes: 1. Track when a hung user queue is detected using found_hung_queue flag 2. Call amdgpu_mes_resume() to restart MES scheduling after completing the hang recovery process 3. This complements the existing recovery steps (fence force completion and device wedging) by ensuring the scheduler can process new work Without this resume call, the MES scheduler may remain in a paused state even after the hung queue has been handled, preventing newly submitted work from being processed and leading to system stalls. Acked-by: Alex Deucher Signed-off-by: Jesse Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c index b1ee9473d628..64cae89357b6 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c @@ -208,6 +208,7 @@ static int mes_userq_detect_and_reset(struct amdgpu_device *adev, unsigned int hung_db_num = 0; unsigned long queue_id; u32 db_array[8]; + bool found_hung_queue = false; int r, i; if (db_array_size > 8) { @@ -232,6 +233,7 @@ static int mes_userq_detect_and_reset(struct amdgpu_device *adev, for (i = 0; i < hung_db_num; i++) { if (queue->doorbell_index == db_array[i]) { queue->state = AMDGPU_USERQ_STATE_HUNG; + found_hung_queue = true; atomic_inc(&adev->gpu_reset_counter); amdgpu_userq_fence_driver_force_completion(queue); drm_dev_wedged_event(adev_to_drm(adev), DRM_WEDGE_RECOVERY_NONE, NULL); @@ -241,6 +243,11 @@ static int mes_userq_detect_and_reset(struct amdgpu_device *adev, } } + if (found_hung_queue) { + /* Resume scheduling after hang recovery */ + r = amdgpu_mes_resume(adev); + } + return r; } From 3b832487a9e5b69d1f00ec8f02dc18d0299573ed Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Thu, 6 Nov 2025 09:47:47 +0800 Subject: [PATCH 37/67] drm/amdgpu/userqueue: Remove duplicate amdgpu_reset.h header ./drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c: amdgpu_reset.h is included more than once. Reported-by: Abaci Robot Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=26930 Signed-off-by: Jiapeng Chong Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index 836a14ef0052..9a969175900e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -32,7 +32,6 @@ #include "amdgpu_vm.h" #include "amdgpu_userq.h" #include "amdgpu_hmm.h" -#include "amdgpu_reset.h" #include "amdgpu_userq_fence.h" u32 amdgpu_userq_get_supported_ip_mask(struct amdgpu_device *adev) From 3a4132e6cb4879764d886f11b9fd225197fb48dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Fri, 7 Nov 2025 16:57:34 +0100 Subject: [PATCH 38/67] drm/amdgpu/gmc6: Place gart at low address range MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of using a best-fit algorithm to determine which part of the VMID 0 address space to use for GART, always use the low address range. A subsequent commit will use this to map the VCPU BO in GART for the VCE1 IP block. Split this into a separate patch to make it easier to bisect, in case there are any errors in the future. Signed-off-by: Timur Kristóf Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index f6ad7911f1e6..499dfd78092d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -213,7 +213,7 @@ static void gmc_v6_0_vram_gtt_location(struct amdgpu_device *adev, amdgpu_gmc_set_agp_default(adev, mc); amdgpu_gmc_vram_location(adev, mc, base); - amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT); + amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_LOW); } static void gmc_v6_0_mc_program(struct amdgpu_device *adev) From 237d623ae6594df547fbb003953b5d2dd57e6124 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Fri, 7 Nov 2025 16:57:35 +0100 Subject: [PATCH 39/67] drm/amdgpu/gart: Add helper to bind VRAM pages (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Binds pages that located in VRAM to the GART page table. Useful when a kernel BO is located in VRAM but needs to be accessed from the GART address space, for example to give a kernel BO a 32-bit address when GART is placed in LOW address space. v2: - Refactor function to be more reusable Signed-off-by: Timur Kristóf Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 36 ++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h | 3 ++ 2 files changed, 39 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index 83f3b94ed975..d2237ce9da70 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -367,6 +367,42 @@ void amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset, drm_dev_exit(idx); } +/** + * amdgpu_gart_map_vram_range - map VRAM pages into the GART page table + * + * @adev: amdgpu_device pointer + * @pa: physical address of the first page to be mapped + * @start_page: first page to map in the GART aperture + * @num_pages: number of pages to be mapped + * @flags: page table entry flags + * @dst: CPU address of the GART table + * + * Binds a BO that is allocated in VRAM to the GART page table + * (all ASICs). + * + * Useful when a kernel BO is located in VRAM but + * needs to be accessed from the GART address space. + */ +void amdgpu_gart_map_vram_range(struct amdgpu_device *adev, uint64_t pa, + uint64_t start_page, uint64_t num_pages, + uint64_t flags, void *dst) +{ + u32 i, idx; + + /* The SYSTEM flag indicates the pages aren't in VRAM. */ + WARN_ON_ONCE(flags & AMDGPU_PTE_SYSTEM); + + if (!drm_dev_enter(adev_to_drm(adev), &idx)) + return; + + for (i = 0; i < num_pages; ++i) { + amdgpu_gmc_set_pte_pde(adev, adev->gart.ptr, + start_page + i, pa + AMDGPU_GPU_PAGE_SIZE * i, flags); + } + + drm_dev_exit(idx); +} + /** * amdgpu_gart_bind - bind pages into the gart page table * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h index 7cc980bf4725..d3118275ddae 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h @@ -64,5 +64,8 @@ void amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset, void *dst); void amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset, int pages, dma_addr_t *dma_addr, uint64_t flags); +void amdgpu_gart_map_vram_range(struct amdgpu_device *adev, uint64_t pa, + uint64_t start_page, uint64_t num_pages, + uint64_t flags, void *dst); void amdgpu_gart_invalidate_tlb(struct amdgpu_device *adev); #endif From 2da2e952a785a49c5b3cc624bf00ab6c2d7607a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Sun, 9 Nov 2025 19:26:40 +0100 Subject: [PATCH 40/67] drm/amdgpu: Use DC by default on SI dGPUs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that DC supports analog connectors, it has reached feature parity with the legacy non-DC display driver on SI dGPUs. Use the DC display driver by default on SI dGPUs, unless it is explicitly disabled using the amdgpu.dc=0 module parameter. DC brings proper support for DP/HDMI audio, DP MST, 10-bit colors, some HDR features, atomic modesetting, etc. Also clarify the comment about what is missing to have full DC support for CIK APUs. Signed-off-by: Timur Kristóf Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 86255c13fbb7..bfbf874a1000 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4249,24 +4249,13 @@ bool amdgpu_device_asic_has_dc_support(struct pci_dev *pdev, case CHIP_PITCAIRN: case CHIP_VERDE: case CHIP_OLAND: - /* - * We have systems in the wild with these ASICs that require - * LVDS and VGA support which is not supported with DC. - * - * Fallback to the non-DC driver here by default so as not to - * cause regressions. - */ -#if defined(CONFIG_DRM_AMD_DC_SI) - return amdgpu_dc > 0; -#else - return false; -#endif + return amdgpu_dc != 0 && IS_ENABLED(CONFIG_DRM_AMD_DC_SI); case CHIP_KAVERI: case CHIP_KABINI: case CHIP_MULLINS: /* * We have systems in the wild with these ASICs that require - * VGA support which is not supported with DC. + * TRAVIS and NUTMEG support which is not supported with DC. * * Fallback to the non-DC driver here by default so as not to * cause regressions. From 15bd4958fe38e763bc17b607ba55155254a01f55 Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Thu, 6 Nov 2025 10:17:06 -0500 Subject: [PATCH 41/67] drm/amdkfd: relax checks for over allocation of save area Over allocation of save area is not fatal, only under allocation is. ROCm has various components that independently claim authority over save area size. Unless KFD decides to claim single authority, relax size checks. Signed-off-by: Jonathan Kim Reviewed-by: Philip Yang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_queue.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c index a65c67cf56ff..f1e7583650c4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c @@ -297,16 +297,16 @@ int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_prope goto out_err_unreserve; } - if (properties->ctx_save_restore_area_size != topo_dev->node_props.cwsr_size) { - pr_debug("queue cwsr size 0x%x not equal to node cwsr size 0x%x\n", + if (properties->ctx_save_restore_area_size < topo_dev->node_props.cwsr_size) { + pr_debug("queue cwsr size 0x%x not sufficient for node cwsr size 0x%x\n", properties->ctx_save_restore_area_size, topo_dev->node_props.cwsr_size); err = -EINVAL; goto out_err_unreserve; } - total_cwsr_size = (topo_dev->node_props.cwsr_size + topo_dev->node_props.debug_memory_size) - * NUM_XCC(pdd->dev->xcc_mask); + total_cwsr_size = (properties->ctx_save_restore_area_size + + topo_dev->node_props.debug_memory_size) * NUM_XCC(pdd->dev->xcc_mask); total_cwsr_size = ALIGN(total_cwsr_size, PAGE_SIZE); err = kfd_queue_buffer_get(vm, (void *)properties->ctx_save_restore_area_address, @@ -352,8 +352,8 @@ int kfd_queue_release_buffers(struct kfd_process_device *pdd, struct queue_prope topo_dev = kfd_topology_device_by_id(pdd->dev->id); if (!topo_dev) return -EINVAL; - total_cwsr_size = (topo_dev->node_props.cwsr_size + topo_dev->node_props.debug_memory_size) - * NUM_XCC(pdd->dev->xcc_mask); + total_cwsr_size = (properties->ctx_save_restore_area_size + + topo_dev->node_props.debug_memory_size) * NUM_XCC(pdd->dev->xcc_mask); total_cwsr_size = ALIGN(total_cwsr_size, PAGE_SIZE); kfd_queue_buffer_svm_put(pdd, properties->ctx_save_restore_area_address, total_cwsr_size); From 43a0ca334e0de9b1eba0dc25f0c901d0e5d02c2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Fri, 7 Nov 2025 16:57:36 +0100 Subject: [PATCH 42/67] drm/amdgpu/ttm: Use GART helper to map VRAM pages (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the GART helper function introduced in the previous commit to map the VRAM pages of the transfer window to GART. No functional changes, just code cleanup. Split this into a separate commit to make it easier to bisect, in case there are problems in the future. Signed-off-by: Timur Kristóf Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 9777c5c9cb26..838a51b6098b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -188,7 +188,6 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, struct amdgpu_job *job; void *cpu_addr; uint64_t flags; - unsigned int i; int r; BUG_ON(adev->mman.buffer_funcs->copy_max_bytes < @@ -255,16 +254,9 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, dma_addr = &bo->ttm->dma_address[mm_cur->start >> PAGE_SHIFT]; amdgpu_gart_map(adev, 0, num_pages, dma_addr, flags, cpu_addr); } else { - dma_addr_t dma_address; + u64 pa = mm_cur->start + adev->vm_manager.vram_base_offset; - dma_address = mm_cur->start; - dma_address += adev->vm_manager.vram_base_offset; - - for (i = 0; i < num_pages; ++i) { - amdgpu_gart_map(adev, i << PAGE_SHIFT, 1, &dma_address, - flags, cpu_addr); - dma_address += PAGE_SIZE; - } + amdgpu_gart_map_vram_range(adev, pa, 0, num_pages, flags, cpu_addr); } dma_fence_put(amdgpu_job_submit(job)); From f2e18c946532db7357f33339d0ec1531ebf49ae4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Fri, 7 Nov 2025 16:57:37 +0100 Subject: [PATCH 43/67] drm/amdgpu/vce: Move firmware load to amdgpu_vce_early_init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Try to load the VCE firmware at early_init. When the correct firmware is not found, return -ENOENT. This way, the driver initialization will complete even without VCE, and the GPU will be functional, albeit without video encoding capabilities. This is necessary because we are planning to add support for the VCE1, and AMD hasn't yet publised the correct firmware for this version. So we need to anticipate that users will try to boot amdgpu on SI GPUs without the correct VCE1 firmware present on their system. Signed-off-by: Timur Kristóf Reviewed-by: Christian König Reviewed-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 121 +++++++++++++++--------- drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h | 1 + drivers/gpu/drm/amd/amdgpu/vce_v2_0.c | 5 + drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | 5 + drivers/gpu/drm/amd/amdgpu/vce_v4_0.c | 5 + 5 files changed, 91 insertions(+), 46 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index ce318f5de047..3cbdc76656ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -88,82 +88,87 @@ static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, bool direct, struct dma_fence **fence); /** - * amdgpu_vce_sw_init - allocate memory, load vce firmware + * amdgpu_vce_firmware_name() - determine the firmware file name for VCE * * @adev: amdgpu_device pointer - * @size: size for the new BO * - * First step to get VCE online, allocate memory and load the firmware + * Each chip that has VCE IP may need a different firmware. + * This function returns the name of the VCE firmware file + * appropriate for the current chip. */ -int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) +static const char *amdgpu_vce_firmware_name(struct amdgpu_device *adev) { - const char *fw_name; - const struct common_firmware_header *hdr; - unsigned int ucode_version, version_major, version_minor, binary_id; - int i, r; - switch (adev->asic_type) { #ifdef CONFIG_DRM_AMDGPU_CIK case CHIP_BONAIRE: - fw_name = FIRMWARE_BONAIRE; - break; + return FIRMWARE_BONAIRE; case CHIP_KAVERI: - fw_name = FIRMWARE_KAVERI; - break; + return FIRMWARE_KAVERI; case CHIP_KABINI: - fw_name = FIRMWARE_KABINI; - break; + return FIRMWARE_KABINI; case CHIP_HAWAII: - fw_name = FIRMWARE_HAWAII; - break; + return FIRMWARE_HAWAII; case CHIP_MULLINS: - fw_name = FIRMWARE_MULLINS; - break; + return FIRMWARE_MULLINS; #endif case CHIP_TONGA: - fw_name = FIRMWARE_TONGA; - break; + return FIRMWARE_TONGA; case CHIP_CARRIZO: - fw_name = FIRMWARE_CARRIZO; - break; + return FIRMWARE_CARRIZO; case CHIP_FIJI: - fw_name = FIRMWARE_FIJI; - break; + return FIRMWARE_FIJI; case CHIP_STONEY: - fw_name = FIRMWARE_STONEY; - break; + return FIRMWARE_STONEY; case CHIP_POLARIS10: - fw_name = FIRMWARE_POLARIS10; - break; + return FIRMWARE_POLARIS10; case CHIP_POLARIS11: - fw_name = FIRMWARE_POLARIS11; - break; + return FIRMWARE_POLARIS11; case CHIP_POLARIS12: - fw_name = FIRMWARE_POLARIS12; - break; + return FIRMWARE_POLARIS12; case CHIP_VEGAM: - fw_name = FIRMWARE_VEGAM; - break; + return FIRMWARE_VEGAM; case CHIP_VEGA10: - fw_name = FIRMWARE_VEGA10; - break; + return FIRMWARE_VEGA10; case CHIP_VEGA12: - fw_name = FIRMWARE_VEGA12; - break; + return FIRMWARE_VEGA12; case CHIP_VEGA20: - fw_name = FIRMWARE_VEGA20; - break; + return FIRMWARE_VEGA20; default: - return -EINVAL; + return NULL; } +} + +/** + * amdgpu_vce_early_init() - try to load VCE firmware + * + * @adev: amdgpu_device pointer + * + * Tries to load the VCE firmware. + * + * When not found, returns ENOENT so that the driver can + * still load and initialize the rest of the IP blocks. + * The GPU can function just fine without VCE, they will just + * not support video encoding. + */ +int amdgpu_vce_early_init(struct amdgpu_device *adev) +{ + const char *fw_name = amdgpu_vce_firmware_name(adev); + const struct common_firmware_header *hdr; + unsigned int ucode_version, version_major, version_minor, binary_id; + int r; + + if (!fw_name) + return -ENOENT; r = amdgpu_ucode_request(adev, &adev->vce.fw, AMDGPU_UCODE_REQUIRED, "%s", fw_name); if (r) { - dev_err(adev->dev, "amdgpu_vce: Can't validate firmware \"%s\"\n", - fw_name); + dev_err(adev->dev, + "amdgpu_vce: Firmware \"%s\" not found or failed to validate (%d)\n", + fw_name, r); + amdgpu_ucode_release(&adev->vce.fw); - return r; + return -ENOENT; } hdr = (const struct common_firmware_header *)adev->vce.fw->data; @@ -172,11 +177,35 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) version_major = (ucode_version >> 20) & 0xfff; version_minor = (ucode_version >> 8) & 0xfff; binary_id = ucode_version & 0xff; - DRM_INFO("Found VCE firmware Version: %d.%d Binary ID: %d\n", + dev_info(adev->dev, "Found VCE firmware Version: %d.%d Binary ID: %d\n", version_major, version_minor, binary_id); adev->vce.fw_version = ((version_major << 24) | (version_minor << 16) | (binary_id << 8)); + return 0; +} + +/** + * amdgpu_vce_sw_init() - allocate memory for VCE BO + * + * @adev: amdgpu_device pointer + * @size: size for the new BO + * + * First step to get VCE online: allocate memory for VCE BO. + * The VCE firmware binary is copied into the VCE BO later, + * in amdgpu_vce_resume. The VCE executes its code from the + * VCE BO and also uses the space in this BO for its stack and data. + * + * Ideally this BO should be placed in VRAM for optimal performance, + * although technically it also runs from system RAM (albeit slowly). + */ +int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) +{ + int i, r; + + if (!adev->vce.fw) + return -ENOENT; + r = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h index 6e53f872d084..22acd7b35945 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h @@ -53,6 +53,7 @@ struct amdgpu_vce { unsigned num_rings; }; +int amdgpu_vce_early_init(struct amdgpu_device *adev); int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size); int amdgpu_vce_sw_fini(struct amdgpu_device *adev); int amdgpu_vce_entity_init(struct amdgpu_device *adev, struct amdgpu_ring *ring); diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c index bee3e904a6bc..8ea8a6193492 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c @@ -407,6 +407,11 @@ static void vce_v2_0_enable_mgcg(struct amdgpu_device *adev, bool enable, static int vce_v2_0_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; + int r; + + r = amdgpu_vce_early_init(adev); + if (r) + return r; adev->vce.num_rings = 2; diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 708123899c41..719e9643c43d 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -399,6 +399,7 @@ static unsigned vce_v3_0_get_harvest_config(struct amdgpu_device *adev) static int vce_v3_0_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; + int r; adev->vce.harvest_config = vce_v3_0_get_harvest_config(adev); @@ -407,6 +408,10 @@ static int vce_v3_0_early_init(struct amdgpu_ip_block *ip_block) (AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1)) return -ENOENT; + r = amdgpu_vce_early_init(adev); + if (r) + return r; + adev->vce.num_rings = 3; vce_v3_0_set_ring_funcs(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index 335bda64ff5b..2d64002bed61 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -410,6 +410,11 @@ static int vce_v4_0_stop(struct amdgpu_device *adev) static int vce_v4_0_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; + int r; + + r = amdgpu_vce_early_init(adev); + if (r) + return r; if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */ adev->vce.num_rings = 1; From e583d31f44a78c8d9ed9901dca7645b4857414a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Fri, 7 Nov 2025 16:57:38 +0100 Subject: [PATCH 44/67] drm/amdgpu/vce: Clear VCPU BO, don't unmap/unreserve (v4) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The VCPU BO doesn't only contain the VCE firmware but also other ranges that the VCE uses for its stack and data. Let's initialize this to zero to avoid having garbage in the VCPU BO. Additionally, don't unmap/unreserve the VCPU BO. The VCPU BO needs to stay at the same location before and after sleep/resume because the FW code is not relocatable once it's started. Signed-off-by: Timur Kristóf Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 23 +++-------------------- 1 file changed, 3 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 3cbdc76656ab..5b5f44e040eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -314,40 +314,23 @@ int amdgpu_vce_suspend(struct amdgpu_device *adev) */ int amdgpu_vce_resume(struct amdgpu_device *adev) { - void *cpu_addr; const struct common_firmware_header *hdr; unsigned int offset; - int r, idx; + int idx; if (adev->vce.vcpu_bo == NULL) return -EINVAL; - r = amdgpu_bo_reserve(adev->vce.vcpu_bo, false); - if (r) { - dev_err(adev->dev, "(%d) failed to reserve VCE bo\n", r); - return r; - } - - r = amdgpu_bo_kmap(adev->vce.vcpu_bo, &cpu_addr); - if (r) { - amdgpu_bo_unreserve(adev->vce.vcpu_bo); - dev_err(adev->dev, "(%d) VCE map failed\n", r); - return r; - } - hdr = (const struct common_firmware_header *)adev->vce.fw->data; offset = le32_to_cpu(hdr->ucode_array_offset_bytes); if (drm_dev_enter(adev_to_drm(adev), &idx)) { - memcpy_toio(cpu_addr, adev->vce.fw->data + offset, + memset_io(adev->vce.cpu_addr, 0, amdgpu_bo_size(adev->vce.vcpu_bo)); + memcpy_toio(adev->vce.cpu_addr, adev->vce.fw->data + offset, adev->vce.fw->size - offset); drm_dev_exit(idx); } - amdgpu_bo_kunmap(adev->vce.vcpu_bo); - - amdgpu_bo_unreserve(adev->vce.vcpu_bo); - return 0; } From 1b8ed1168a78e21acac5ee8ba6de42e7ccc2360f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Fri, 7 Nov 2025 16:57:39 +0100 Subject: [PATCH 45/67] drm/amdgpu/vce1: Clean up register definitions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sid.h header contained some VCE1 register definitions, but they were using byte offsets (probably copied from the old radeon driver). Move all of these to the proper VCE1 headers and ensure they are in dword offsets. Also add the register definitions that we need for the firmware validation mechanism in VCE1. Signed-off-by: Timur Kristóf Co-developed-by: Alexandre Demers Signed-off-by: Alexandre Demers Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sid.h | 40 ------------------- .../drm/amd/include/asic_reg/vce/vce_1_0_d.h | 5 +++ .../include/asic_reg/vce/vce_1_0_sh_mask.h | 10 +++++ 3 files changed, 15 insertions(+), 40 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sid.h b/drivers/gpu/drm/amd/amdgpu/sid.h index cbd4f8951cfa..561462a8332e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sid.h +++ b/drivers/gpu/drm/amd/amdgpu/sid.h @@ -582,45 +582,6 @@ #define DMA_PACKET_NOP 0xf /* VCE */ -#define VCE_STATUS 0x20004 -#define VCE_VCPU_CNTL 0x20014 -#define VCE_CLK_EN (1 << 0) -#define VCE_VCPU_CACHE_OFFSET0 0x20024 -#define VCE_VCPU_CACHE_SIZE0 0x20028 -#define VCE_VCPU_CACHE_OFFSET1 0x2002c -#define VCE_VCPU_CACHE_SIZE1 0x20030 -#define VCE_VCPU_CACHE_OFFSET2 0x20034 -#define VCE_VCPU_CACHE_SIZE2 0x20038 -#define VCE_SOFT_RESET 0x20120 -#define VCE_ECPU_SOFT_RESET (1 << 0) -#define VCE_FME_SOFT_RESET (1 << 2) -#define VCE_RB_BASE_LO2 0x2016c -#define VCE_RB_BASE_HI2 0x20170 -#define VCE_RB_SIZE2 0x20174 -#define VCE_RB_RPTR2 0x20178 -#define VCE_RB_WPTR2 0x2017c -#define VCE_RB_BASE_LO 0x20180 -#define VCE_RB_BASE_HI 0x20184 -#define VCE_RB_SIZE 0x20188 -#define VCE_RB_RPTR 0x2018c -#define VCE_RB_WPTR 0x20190 -#define VCE_CLOCK_GATING_A 0x202f8 -#define VCE_CLOCK_GATING_B 0x202fc -#define VCE_UENC_CLOCK_GATING 0x205bc -#define VCE_UENC_REG_CLOCK_GATING 0x205c0 -#define VCE_FW_REG_STATUS 0x20e10 -# define VCE_FW_REG_STATUS_BUSY (1 << 0) -# define VCE_FW_REG_STATUS_PASS (1 << 3) -# define VCE_FW_REG_STATUS_DONE (1 << 11) -#define VCE_LMI_FW_START_KEYSEL 0x20e18 -#define VCE_LMI_FW_PERIODIC_CTRL 0x20e20 -#define VCE_LMI_CTRL2 0x20e74 -#define VCE_LMI_CTRL 0x20e98 -#define VCE_LMI_VM_CTRL 0x20ea0 -#define VCE_LMI_SWAP_CNTL 0x20eb4 -#define VCE_LMI_SWAP_CNTL1 0x20eb8 -#define VCE_LMI_CACHE_CTRL 0x20ef4 - #define VCE_CMD_NO_OP 0x00000000 #define VCE_CMD_END 0x00000001 #define VCE_CMD_IB 0x00000002 @@ -629,7 +590,6 @@ #define VCE_CMD_IB_AUTO 0x00000005 #define VCE_CMD_SEMAPHORE 0x00000006 - //#dce stupp /* display controller offsets used for crtc/cur/lut/grph/viewport/etc. */ #define CRTC0_REGISTER_OFFSET (0x1b7c - 0x1b7c) //(0x6df0 - 0x6df0)/4 diff --git a/drivers/gpu/drm/amd/include/asic_reg/vce/vce_1_0_d.h b/drivers/gpu/drm/amd/include/asic_reg/vce/vce_1_0_d.h index 2176548e9203..9778822dd2a0 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/vce/vce_1_0_d.h +++ b/drivers/gpu/drm/amd/include/asic_reg/vce/vce_1_0_d.h @@ -60,5 +60,10 @@ #define mmVCE_VCPU_CACHE_SIZE1 0x800C #define mmVCE_VCPU_CACHE_SIZE2 0x800E #define mmVCE_VCPU_CNTL 0x8005 +#define mmVCE_VCPU_SCRATCH7 0x8037 +#define mmVCE_FW_REG_STATUS 0x8384 +#define mmVCE_LMI_FW_PERIODIC_CTRL 0x8388 +#define mmVCE_LMI_FW_START_KEYSEL 0x8386 + #endif diff --git a/drivers/gpu/drm/amd/include/asic_reg/vce/vce_1_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/vce/vce_1_0_sh_mask.h index ea5b26b11cb1..1f82d6f5abde 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/vce/vce_1_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/vce/vce_1_0_sh_mask.h @@ -61,6 +61,8 @@ #define VCE_RB_WPTR__RB_WPTR__SHIFT 0x00000004 #define VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK 0x00000001L #define VCE_SOFT_RESET__ECPU_SOFT_RESET__SHIFT 0x00000000 +#define VCE_SOFT_RESET__FME_SOFT_RESET_MASK 0x00000004L +#define VCE_SOFT_RESET__FME_SOFT_RESET__SHIFT 0x00000002 #define VCE_STATUS__JOB_BUSY_MASK 0x00000001L #define VCE_STATUS__JOB_BUSY__SHIFT 0x00000000 #define VCE_STATUS__UENC_BUSY_MASK 0x00000100L @@ -95,5 +97,13 @@ #define VCE_VCPU_CNTL__CLK_EN__SHIFT 0x00000000 #define VCE_VCPU_CNTL__RBBM_SOFT_RESET_MASK 0x00040000L #define VCE_VCPU_CNTL__RBBM_SOFT_RESET__SHIFT 0x00000012 +#define VCE_CLOCK_GATING_A__CGC_DYN_CLOCK_MODE_MASK 0x00010000 +#define VCE_CLOCK_GATING_A__CGC_DYN_CLOCK_MODE_SHIFT 0x00000010 +#define VCE_FW_REG_STATUS__BUSY_MASK 0x0000001 +#define VCE_FW_REG_STATUS__BUSY__SHIFT 0x0000001 +#define VCE_FW_REG_STATUS__PASS_MASK 0x0000008 +#define VCE_FW_REG_STATUS__PASS__SHIFT 0x0000003 +#define VCE_FW_REG_STATUS__DONE_MASK 0x0000800 +#define VCE_FW_REG_STATUS__DONE__SHIFT 0x000000b #endif From e40251971c4d158a5793c446648c62515fd67713 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Fri, 7 Nov 2025 16:57:40 +0100 Subject: [PATCH 46/67] drm/amdgpu/vce1: Load VCE1 firmware MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Load VCE1 firmware using amdgpu_ucode_request, just like it is done for other VCE versions. All SI chips share the same VCE1 firmware file: vce_1_0_0.bin which will be sent to linux-firmware soon. Signed-off-by: Timur Kristóf Co-developed-by: Alexandre Demers Signed-off-by: Alexandre Demers Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 5b5f44e040eb..eb4a15db2ef2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -41,6 +41,9 @@ #define VCE_IDLE_TIMEOUT msecs_to_jiffies(1000) /* Firmware Names */ +#ifdef CONFIG_DRM_AMDGPU_SI +#define FIRMWARE_VCE_V1_0 "amdgpu/vce_1_0_0.bin" +#endif #ifdef CONFIG_DRM_AMDGPU_CIK #define FIRMWARE_BONAIRE "amdgpu/bonaire_vce.bin" #define FIRMWARE_KABINI "amdgpu/kabini_vce.bin" @@ -61,6 +64,9 @@ #define FIRMWARE_VEGA12 "amdgpu/vega12_vce.bin" #define FIRMWARE_VEGA20 "amdgpu/vega20_vce.bin" +#ifdef CONFIG_DRM_AMDGPU_SI +MODULE_FIRMWARE(FIRMWARE_VCE_V1_0); +#endif #ifdef CONFIG_DRM_AMDGPU_CIK MODULE_FIRMWARE(FIRMWARE_BONAIRE); MODULE_FIRMWARE(FIRMWARE_KABINI); @@ -99,6 +105,12 @@ static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, static const char *amdgpu_vce_firmware_name(struct amdgpu_device *adev) { switch (adev->asic_type) { +#ifdef CONFIG_DRM_AMDGPU_SI + case CHIP_PITCAIRN: + case CHIP_TAHITI: + case CHIP_VERDE: + return FIRMWARE_VCE_V1_0; +#endif #ifdef CONFIG_DRM_AMDGPU_CIK case CHIP_BONAIRE: return FIRMWARE_BONAIRE; From d4a640d4b9f34aa9472c71986ef4b5a42dbe4f0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Fri, 7 Nov 2025 16:57:41 +0100 Subject: [PATCH 47/67] drm/amdgpu/vce1: Implement VCE1 IP block (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement the necessary functionality to support the VCE1. This implementation is based on: - VCE2 code from amdgpu - VCE1 code from radeon (the old driver) - Some trial and error A subsequent commit will ensure correct mapping for the VCPU BO, which will make this actually work. v2: - Use memset_io more. - Use memcpy_toio more. - Remove __func__ from warnings. - Don't reserve and map the VCPU BO anymore. - Add empty line to multi-line comments Signed-off-by: Timur Kristóf Co-developed-by: Alexandre Demers Signed-off-by: Alexandre Demers Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/Makefile | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h | 1 + drivers/gpu/drm/amd/amdgpu/vce_v1_0.c | 784 ++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/vce_v1_0.h | 32 + 4 files changed, 818 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/vce_v1_0.c create mode 100644 drivers/gpu/drm/amd/amdgpu/vce_v1_0.h diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index ebe08947c5a3..c88760fb52ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -78,7 +78,7 @@ amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o \ dce_v8_0.o gfx_v7_0.o cik_sdma.o uvd_v4_2.o vce_v2_0.o amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o \ - uvd_v3_1.o + uvd_v3_1.o vce_v1_0.o amdgpu-y += \ vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h index 22acd7b35945..050783802623 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h @@ -51,6 +51,7 @@ struct amdgpu_vce { struct drm_sched_entity entity; uint32_t srbm_soft_reset; unsigned num_rings; + uint32_t keyselect; }; int amdgpu_vce_early_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c new file mode 100644 index 000000000000..bf9f943852cb --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c @@ -0,0 +1,784 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2013 Advanced Micro Devices, Inc. + * Copyright 2025 Valve Corporation + * Copyright 2025 Alexandre Demers + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * Authors: Christian König + * Timur Kristóf + * Alexandre Demers + */ + +#include + +#include "amdgpu.h" +#include "amdgpu_vce.h" +#include "sid.h" +#include "vce_v1_0.h" +#include "vce/vce_1_0_d.h" +#include "vce/vce_1_0_sh_mask.h" +#include "oss/oss_1_0_d.h" +#include "oss/oss_1_0_sh_mask.h" + +#define VCE_V1_0_FW_SIZE (256 * 1024) +#define VCE_V1_0_STACK_SIZE (64 * 1024) +#define VCE_V1_0_DATA_SIZE (7808 * (AMDGPU_MAX_VCE_HANDLES + 1)) +#define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02 + +static void vce_v1_0_set_ring_funcs(struct amdgpu_device *adev); +static void vce_v1_0_set_irq_funcs(struct amdgpu_device *adev); + +struct vce_v1_0_fw_signature { + int32_t offset; + uint32_t length; + int32_t number; + struct { + uint32_t chip_id; + uint32_t keyselect; + uint32_t nonce[4]; + uint32_t sigval[4]; + } val[8]; +}; + +/** + * vce_v1_0_ring_get_rptr - get read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware read pointer + */ +static uint64_t vce_v1_0_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->me == 0) + return RREG32(mmVCE_RB_RPTR); + else + return RREG32(mmVCE_RB_RPTR2); +} + +/** + * vce_v1_0_ring_get_wptr - get write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware write pointer + */ +static uint64_t vce_v1_0_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->me == 0) + return RREG32(mmVCE_RB_WPTR); + else + return RREG32(mmVCE_RB_WPTR2); +} + +/** + * vce_v1_0_ring_set_wptr - set write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the write pointer to the hardware + */ +static void vce_v1_0_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->me == 0) + WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr)); + else + WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr)); +} + +static int vce_v1_0_lmi_clean(struct amdgpu_device *adev) +{ + int i, j; + + for (i = 0; i < 10; ++i) { + for (j = 0; j < 100; ++j) { + if (RREG32(mmVCE_LMI_STATUS) & 0x337f) + return 0; + + mdelay(10); + } + } + + return -ETIMEDOUT; +} + +static int vce_v1_0_firmware_loaded(struct amdgpu_device *adev) +{ + int i, j; + + for (i = 0; i < 10; ++i) { + for (j = 0; j < 100; ++j) { + if (RREG32(mmVCE_STATUS) & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK) + return 0; + mdelay(10); + } + + dev_err(adev->dev, "VCE not responding, trying to reset the ECPU\n"); + + WREG32_P(mmVCE_SOFT_RESET, + VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, + ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); + mdelay(10); + WREG32_P(mmVCE_SOFT_RESET, 0, + ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); + mdelay(10); + } + + return -ETIMEDOUT; +} + +static void vce_v1_0_init_cg(struct amdgpu_device *adev) +{ + u32 tmp; + + tmp = RREG32(mmVCE_CLOCK_GATING_A); + tmp |= VCE_CLOCK_GATING_A__CGC_DYN_CLOCK_MODE_MASK; + WREG32(mmVCE_CLOCK_GATING_A, tmp); + + tmp = RREG32(mmVCE_CLOCK_GATING_B); + tmp |= 0x1e; + tmp &= ~0xe100e1; + WREG32(mmVCE_CLOCK_GATING_B, tmp); + + tmp = RREG32(mmVCE_UENC_CLOCK_GATING); + tmp &= ~0xff9ff000; + WREG32(mmVCE_UENC_CLOCK_GATING, tmp); + + tmp = RREG32(mmVCE_UENC_REG_CLOCK_GATING); + tmp &= ~0x3ff; + WREG32(mmVCE_UENC_REG_CLOCK_GATING, tmp); +} + +/** + * vce_v1_0_load_fw_signature - load firmware signature into VCPU BO + * + * @adev: amdgpu_device pointer + * + * The VCE1 firmware validation mechanism needs a firmware signature. + * This function finds the signature appropriate for the current + * ASIC and writes that into the VCPU BO. + */ +static int vce_v1_0_load_fw_signature(struct amdgpu_device *adev) +{ + const struct common_firmware_header *hdr; + struct vce_v1_0_fw_signature *sign; + unsigned int ucode_offset; + uint32_t chip_id; + u32 *cpu_addr; + int i; + + hdr = (const struct common_firmware_header *)adev->vce.fw->data; + ucode_offset = le32_to_cpu(hdr->ucode_array_offset_bytes); + cpu_addr = adev->vce.cpu_addr; + + sign = (void *)adev->vce.fw->data + ucode_offset; + + switch (adev->asic_type) { + case CHIP_TAHITI: + chip_id = 0x01000014; + break; + case CHIP_VERDE: + chip_id = 0x01000015; + break; + case CHIP_PITCAIRN: + chip_id = 0x01000016; + break; + default: + dev_err(adev->dev, "asic_type %#010x was not found!", adev->asic_type); + return -EINVAL; + } + + for (i = 0; i < le32_to_cpu(sign->number); ++i) { + if (le32_to_cpu(sign->val[i].chip_id) == chip_id) + break; + } + + if (i == le32_to_cpu(sign->number)) { + dev_err(adev->dev, "chip_id 0x%x for %s was not found in VCE firmware", + chip_id, amdgpu_asic_name[adev->asic_type]); + return -EINVAL; + } + + cpu_addr += (256 - 64) / 4; + memcpy_toio(&cpu_addr[0], &sign->val[i].nonce[0], 16); + cpu_addr[4] = cpu_to_le32(le32_to_cpu(sign->length) + 64); + + memset_io(&cpu_addr[5], 0, 44); + memcpy_toio(&cpu_addr[16], &sign[1], hdr->ucode_size_bytes - sizeof(*sign)); + + cpu_addr += (le32_to_cpu(sign->length) + 64) / 4; + memcpy_toio(&cpu_addr[0], &sign->val[i].sigval[0], 16); + + adev->vce.keyselect = le32_to_cpu(sign->val[i].keyselect); + + return 0; +} + +static int vce_v1_0_wait_for_fw_validation(struct amdgpu_device *adev) +{ + int i; + + dev_dbg(adev->dev, "VCE keyselect: %d", adev->vce.keyselect); + WREG32(mmVCE_LMI_FW_START_KEYSEL, adev->vce.keyselect); + + for (i = 0; i < 10; ++i) { + mdelay(10); + if (RREG32(mmVCE_FW_REG_STATUS) & VCE_FW_REG_STATUS__DONE_MASK) + break; + } + + if (!(RREG32(mmVCE_FW_REG_STATUS) & VCE_FW_REG_STATUS__DONE_MASK)) { + dev_err(adev->dev, "VCE FW validation timeout\n"); + return -ETIMEDOUT; + } + + if (!(RREG32(mmVCE_FW_REG_STATUS) & VCE_FW_REG_STATUS__PASS_MASK)) { + dev_err(adev->dev, "VCE FW validation failed\n"); + return -EINVAL; + } + + for (i = 0; i < 10; ++i) { + mdelay(10); + if (!(RREG32(mmVCE_FW_REG_STATUS) & VCE_FW_REG_STATUS__BUSY_MASK)) + break; + } + + if (RREG32(mmVCE_FW_REG_STATUS) & VCE_FW_REG_STATUS__BUSY_MASK) { + dev_err(adev->dev, "VCE FW busy timeout\n"); + return -ETIMEDOUT; + } + + return 0; +} + +static int vce_v1_0_mc_resume(struct amdgpu_device *adev) +{ + uint32_t offset; + uint32_t size; + + /* + * When the keyselect is already set, don't perturb VCE FW. + * Validation seems to always fail the second time. + */ + if (RREG32(mmVCE_LMI_FW_START_KEYSEL)) { + dev_dbg(adev->dev, "keyselect already set: 0x%x (on CPU: 0x%x)\n", + RREG32(mmVCE_LMI_FW_START_KEYSEL), adev->vce.keyselect); + + WREG32_P(mmVCE_LMI_CTRL2, 0x0, ~0x100); + return 0; + } + + WREG32_P(mmVCE_CLOCK_GATING_A, 0, ~(1 << 16)); + WREG32_P(mmVCE_UENC_CLOCK_GATING, 0x1FF000, ~0xFF9FF000); + WREG32_P(mmVCE_UENC_REG_CLOCK_GATING, 0x3F, ~0x3F); + WREG32(mmVCE_CLOCK_GATING_B, 0); + + WREG32_P(mmVCE_LMI_FW_PERIODIC_CTRL, 0x4, ~0x4); + + WREG32(mmVCE_LMI_CTRL, 0x00398000); + + WREG32_P(mmVCE_LMI_CACHE_CTRL, 0x0, ~0x1); + WREG32(mmVCE_LMI_SWAP_CNTL, 0); + WREG32(mmVCE_LMI_SWAP_CNTL1, 0); + WREG32(mmVCE_LMI_VM_CTRL, 0); + + WREG32(mmVCE_VCPU_SCRATCH7, AMDGPU_MAX_VCE_HANDLES); + + offset = adev->vce.gpu_addr + AMDGPU_VCE_FIRMWARE_OFFSET; + size = VCE_V1_0_FW_SIZE; + WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff); + WREG32(mmVCE_VCPU_CACHE_SIZE0, size); + + offset += size; + size = VCE_V1_0_STACK_SIZE; + WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0x7fffffff); + WREG32(mmVCE_VCPU_CACHE_SIZE1, size); + + offset += size; + size = VCE_V1_0_DATA_SIZE; + WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0x7fffffff); + WREG32(mmVCE_VCPU_CACHE_SIZE2, size); + + WREG32_P(mmVCE_LMI_CTRL2, 0x0, ~0x100); + + return vce_v1_0_wait_for_fw_validation(adev); +} + +/** + * vce_v1_0_is_idle() - Check idle status of VCE1 IP block + * + * @ip_block: amdgpu_ip_block pointer + * + * Check whether VCE is busy according to VCE_STATUS. + * Also check whether the SRBM thinks VCE is busy, although + * SRBM_STATUS.VCE_BUSY seems to be bogus because it + * appears to mirror the VCE_STATUS.VCPU_REPORT_FW_LOADED bit. + */ +static bool vce_v1_0_is_idle(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + bool busy = + (RREG32(mmVCE_STATUS) & (VCE_STATUS__JOB_BUSY_MASK | VCE_STATUS__UENC_BUSY_MASK)) || + (RREG32(mmSRBM_STATUS2) & SRBM_STATUS2__VCE_BUSY_MASK); + + return !busy; +} + +static int vce_v1_0_wait_for_idle(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + unsigned int i; + + for (i = 0; i < adev->usec_timeout; i++) { + udelay(1); + if (vce_v1_0_is_idle(ip_block)) + return 0; + } + return -ETIMEDOUT; +} + +/** + * vce_v1_0_start - start VCE block + * + * @adev: amdgpu_device pointer + * + * Setup and start the VCE block + */ +static int vce_v1_0_start(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + int r; + + WREG32_P(mmVCE_STATUS, 1, ~1); + + r = vce_v1_0_mc_resume(adev); + if (r) + return r; + + ring = &adev->vce.ring[0]; + WREG32(mmVCE_RB_RPTR, lower_32_bits(ring->wptr)); + WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr)); + WREG32(mmVCE_RB_BASE_LO, lower_32_bits(ring->gpu_addr)); + WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32(mmVCE_RB_SIZE, ring->ring_size / 4); + + ring = &adev->vce.ring[1]; + WREG32(mmVCE_RB_RPTR2, lower_32_bits(ring->wptr)); + WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr)); + WREG32(mmVCE_RB_BASE_LO2, lower_32_bits(ring->gpu_addr)); + WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); + WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4); + + WREG32_P(mmVCE_VCPU_CNTL, VCE_VCPU_CNTL__CLK_EN_MASK, + ~VCE_VCPU_CNTL__CLK_EN_MASK); + + WREG32_P(mmVCE_SOFT_RESET, + VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK | + VCE_SOFT_RESET__FME_SOFT_RESET_MASK, + ~(VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK | + VCE_SOFT_RESET__FME_SOFT_RESET_MASK)); + + mdelay(100); + + WREG32_P(mmVCE_SOFT_RESET, 0, + ~(VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK | + VCE_SOFT_RESET__FME_SOFT_RESET_MASK)); + + r = vce_v1_0_firmware_loaded(adev); + + /* Clear VCE_STATUS, otherwise SRBM thinks VCE1 is busy. */ + WREG32(mmVCE_STATUS, 0); + + if (r) { + dev_err(adev->dev, "VCE not responding, giving up\n"); + return r; + } + + return 0; +} + +static int vce_v1_0_stop(struct amdgpu_device *adev) +{ + struct amdgpu_ip_block *ip_block; + int status; + int i; + + ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_VCE); + if (!ip_block) + return -EINVAL; + + if (vce_v1_0_lmi_clean(adev)) + dev_warn(adev->dev, "VCE not idle\n"); + + if (vce_v1_0_wait_for_idle(ip_block)) + dev_warn(adev->dev, "VCE busy: VCE_STATUS=0x%x, SRBM_STATUS2=0x%x\n", + RREG32(mmVCE_STATUS), RREG32(mmSRBM_STATUS2)); + + /* Stall UMC and register bus before resetting VCPU */ + WREG32_P(mmVCE_LMI_CTRL2, 1 << 8, ~(1 << 8)); + + for (i = 0; i < 100; ++i) { + status = RREG32(mmVCE_LMI_STATUS); + if (status & 0x240) + break; + mdelay(1); + } + + WREG32_P(mmVCE_VCPU_CNTL, 0, ~VCE_VCPU_CNTL__CLK_EN_MASK); + + WREG32_P(mmVCE_SOFT_RESET, + VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK | + VCE_SOFT_RESET__FME_SOFT_RESET_MASK, + ~(VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK | + VCE_SOFT_RESET__FME_SOFT_RESET_MASK)); + + WREG32(mmVCE_STATUS, 0); + + return 0; +} + +static void vce_v1_0_enable_mgcg(struct amdgpu_device *adev, bool enable) +{ + u32 tmp; + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)) { + tmp = RREG32(mmVCE_CLOCK_GATING_A); + tmp |= VCE_CLOCK_GATING_A__CGC_DYN_CLOCK_MODE_MASK; + WREG32(mmVCE_CLOCK_GATING_A, tmp); + + tmp = RREG32(mmVCE_UENC_CLOCK_GATING); + tmp &= ~0x1ff000; + tmp |= 0xff800000; + WREG32(mmVCE_UENC_CLOCK_GATING, tmp); + + tmp = RREG32(mmVCE_UENC_REG_CLOCK_GATING); + tmp &= ~0x3ff; + WREG32(mmVCE_UENC_REG_CLOCK_GATING, tmp); + } else { + tmp = RREG32(mmVCE_CLOCK_GATING_A); + tmp &= ~VCE_CLOCK_GATING_A__CGC_DYN_CLOCK_MODE_MASK; + WREG32(mmVCE_CLOCK_GATING_A, tmp); + + tmp = RREG32(mmVCE_UENC_CLOCK_GATING); + tmp |= 0x1ff000; + tmp &= ~0xff800000; + WREG32(mmVCE_UENC_CLOCK_GATING, tmp); + + tmp = RREG32(mmVCE_UENC_REG_CLOCK_GATING); + tmp |= 0x3ff; + WREG32(mmVCE_UENC_REG_CLOCK_GATING, tmp); + } +} + +static int vce_v1_0_early_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int r; + + r = amdgpu_vce_early_init(adev); + if (r) + return r; + + adev->vce.num_rings = 2; + + vce_v1_0_set_ring_funcs(adev); + vce_v1_0_set_irq_funcs(adev); + + return 0; +} + +static int vce_v1_0_sw_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + struct amdgpu_ring *ring; + int r, i; + + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 167, &adev->vce.irq); + if (r) + return r; + + r = amdgpu_vce_sw_init(adev, VCE_V1_0_FW_SIZE + + VCE_V1_0_STACK_SIZE + VCE_V1_0_DATA_SIZE); + if (r) + return r; + + r = amdgpu_vce_resume(adev); + if (r) + return r; + r = vce_v1_0_load_fw_signature(adev); + if (r) + return r; + + for (i = 0; i < adev->vce.num_rings; i++) { + enum amdgpu_ring_priority_level hw_prio = amdgpu_vce_get_ring_prio(i); + + ring = &adev->vce.ring[i]; + sprintf(ring->name, "vce%d", i); + r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0, + hw_prio, NULL); + if (r) + return r; + } + + return r; +} + +static int vce_v1_0_sw_fini(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int r; + + r = amdgpu_vce_suspend(adev); + if (r) + return r; + + return amdgpu_vce_sw_fini(adev); +} + +/** + * vce_v1_0_hw_init - start and test VCE block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * Initialize the hardware, boot up the VCPU and do some testing + */ +static int vce_v1_0_hw_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int i, r; + + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_vce(adev, true); + else + amdgpu_asic_set_vce_clocks(adev, 10000, 10000); + + for (i = 0; i < adev->vce.num_rings; i++) { + r = amdgpu_ring_test_helper(&adev->vce.ring[i]); + if (r) + return r; + } + + dev_info(adev->dev, "VCE initialized successfully.\n"); + + return 0; +} + +static int vce_v1_0_hw_fini(struct amdgpu_ip_block *ip_block) +{ + int r; + + r = vce_v1_0_stop(ip_block->adev); + if (r) + return r; + + cancel_delayed_work_sync(&ip_block->adev->vce.idle_work); + return 0; +} + +static int vce_v1_0_suspend(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int r; + + /* + * Proper cleanups before halting the HW engine: + * - cancel the delayed idle work + * - enable powergating + * - enable clockgating + * - disable dpm + * + * TODO: to align with the VCN implementation, move the + * jobs for clockgating/powergating/dpm setting to + * ->set_powergating_state(). + */ + cancel_delayed_work_sync(&adev->vce.idle_work); + + if (adev->pm.dpm_enabled) { + amdgpu_dpm_enable_vce(adev, false); + } else { + amdgpu_asic_set_vce_clocks(adev, 0, 0); + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE, + AMD_PG_STATE_GATE); + amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE, + AMD_CG_STATE_GATE); + } + + r = vce_v1_0_hw_fini(ip_block); + if (r) { + dev_err(adev->dev, "vce_v1_0_hw_fini() failed with error %i", r); + return r; + } + + return amdgpu_vce_suspend(adev); +} + +static int vce_v1_0_resume(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int r; + + r = amdgpu_vce_resume(adev); + if (r) + return r; + r = vce_v1_0_load_fw_signature(adev); + if (r) + return r; + + return vce_v1_0_hw_init(ip_block); +} + +static int vce_v1_0_set_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned int type, + enum amdgpu_interrupt_state state) +{ + uint32_t val = 0; + + if (state == AMDGPU_IRQ_STATE_ENABLE) + val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK; + + WREG32_P(mmVCE_SYS_INT_EN, val, + ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); + return 0; +} + +static int vce_v1_0_process_interrupt(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + dev_dbg(adev->dev, "IH: VCE\n"); + switch (entry->src_data[0]) { + case 0: + case 1: + amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]); + break; + default: + dev_err(adev->dev, "Unhandled interrupt: %d %d\n", + entry->src_id, entry->src_data[0]); + break; + } + + return 0; +} + +static int vce_v1_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = ip_block->adev; + + vce_v1_0_init_cg(adev); + vce_v1_0_enable_mgcg(adev, state == AMD_CG_STATE_GATE); + + return 0; +} + +static int vce_v1_0_set_powergating_state(struct amdgpu_ip_block *ip_block, + enum amd_powergating_state state) +{ + struct amdgpu_device *adev = ip_block->adev; + + /* + * This doesn't actually powergate the VCE block. + * That's done in the dpm code via the SMC. This + * just re-inits the block as necessary. The actual + * gating still happens in the dpm code. We should + * revisit this when there is a cleaner line between + * the smc and the hw blocks + */ + if (state == AMD_PG_STATE_GATE) + return vce_v1_0_stop(adev); + else + return vce_v1_0_start(adev); +} + +static const struct amd_ip_funcs vce_v1_0_ip_funcs = { + .name = "vce_v1_0", + .early_init = vce_v1_0_early_init, + .sw_init = vce_v1_0_sw_init, + .sw_fini = vce_v1_0_sw_fini, + .hw_init = vce_v1_0_hw_init, + .hw_fini = vce_v1_0_hw_fini, + .suspend = vce_v1_0_suspend, + .resume = vce_v1_0_resume, + .is_idle = vce_v1_0_is_idle, + .wait_for_idle = vce_v1_0_wait_for_idle, + .set_clockgating_state = vce_v1_0_set_clockgating_state, + .set_powergating_state = vce_v1_0_set_powergating_state, +}; + +static const struct amdgpu_ring_funcs vce_v1_0_ring_funcs = { + .type = AMDGPU_RING_TYPE_VCE, + .align_mask = 0xf, + .nop = VCE_CMD_NO_OP, + .support_64bit_ptrs = false, + .no_user_fence = true, + .get_rptr = vce_v1_0_ring_get_rptr, + .get_wptr = vce_v1_0_ring_get_wptr, + .set_wptr = vce_v1_0_ring_set_wptr, + .parse_cs = amdgpu_vce_ring_parse_cs, + .emit_frame_size = 6, /* amdgpu_vce_ring_emit_fence x1 no user fence */ + .emit_ib_size = 4, /* amdgpu_vce_ring_emit_ib */ + .emit_ib = amdgpu_vce_ring_emit_ib, + .emit_fence = amdgpu_vce_ring_emit_fence, + .test_ring = amdgpu_vce_ring_test_ring, + .test_ib = amdgpu_vce_ring_test_ib, + .insert_nop = amdgpu_ring_insert_nop, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_vce_ring_begin_use, + .end_use = amdgpu_vce_ring_end_use, +}; + +static void vce_v1_0_set_ring_funcs(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->vce.num_rings; i++) { + adev->vce.ring[i].funcs = &vce_v1_0_ring_funcs; + adev->vce.ring[i].me = i; + } +}; + +static const struct amdgpu_irq_src_funcs vce_v1_0_irq_funcs = { + .set = vce_v1_0_set_interrupt_state, + .process = vce_v1_0_process_interrupt, +}; + +static void vce_v1_0_set_irq_funcs(struct amdgpu_device *adev) +{ + adev->vce.irq.num_types = 1; + adev->vce.irq.funcs = &vce_v1_0_irq_funcs; +}; + +const struct amdgpu_ip_block_version vce_v1_0_ip_block = { + .type = AMD_IP_BLOCK_TYPE_VCE, + .major = 1, + .minor = 0, + .rev = 0, + .funcs = &vce_v1_0_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v1_0.h b/drivers/gpu/drm/amd/amdgpu/vce_v1_0.h new file mode 100644 index 000000000000..206e7bec897f --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vce_v1_0.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * Copyright 2025 Valve Corporation + * Copyright 2025 Alexandre Demers + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __VCE_V1_0_H__ +#define __VCE_V1_0_H__ + +extern const struct amdgpu_ip_block_version vce_v1_0_ip_block; + +#endif From baf75a087c41eeb03c471099dc5d77e3b068c33b Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Thu, 6 Nov 2025 18:44:29 +0530 Subject: [PATCH 48/67] drm/amdgpu: Check if AID is active before access Access XGMI registers only if AID is active. Signed-off-by: Lijo Lazar Reviewed-by: Asad Kamal Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 1ede308a7c67..2f553af1d2a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -333,6 +333,10 @@ static u32 xgmi_v6_4_get_link_status(struct amdgpu_device *adev, int global_link } i = global_link_num / n; + + if (!(adev->aid_mask & BIT(i))) + return U32_MAX; + addr += adev->asic_funcs->encode_ext_smn_addressing(i); return RREG32_PCIE_EXT(addr); From 221cadb9c6bc2e179a717aac706dbbc9b3377acc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Fri, 7 Nov 2025 16:57:42 +0100 Subject: [PATCH 49/67] drm/amdgpu/vce1: Ensure VCPU BO is in lower 32-bit address space (v3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Based on research and ideas by Alexandre and Christian. VCE1 actually executes its code from the VCPU BO. Due to various hardware limitations, the VCE1 requires the VCPU BO to be in the low 32 bit address range. However, VRAM is typically mapped at the high address range, which means the VCPU can't access VRAM through the FB aperture. To solve this, we write a few page table entries to map the VCPU BO in the GART address range. And we make sure that the GART is located at the low address range. That way the VCE1 can access the VCPU BO. v2: - Adjust to v2 of the GART helper commit. - Add empty line to multi-line comment. v3: - Instead of relying on gmc_v6 to set the GART space before GTT, add a new function amdgpu_vce_required_gart_pages() which is called from amdgpu_gtt_mgr_init() directly. Signed-off-by: Timur Kristóf Co-developed-by: Alexandre Demers Signed-off-by: Alexandre Demers Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 18 +++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h | 1 + drivers/gpu/drm/amd/amdgpu/vce_v1_0.c | 55 +++++++++++++++++++++ 4 files changed, 75 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c index 0760e70402ec..895c1e4c6747 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c @@ -284,6 +284,7 @@ int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size) ttm_resource_manager_init(man, &adev->mman.bdev, gtt_size); start = AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS; + start += amdgpu_vce_required_gart_pages(adev); size = (adev->gmc.gart_size >> PAGE_SHIFT) - start; drm_mm_init(&mgr->mm, start, size); spin_lock_init(&mgr->lock); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index eb4a15db2ef2..a7d8f1ce6ac2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -450,6 +450,24 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp) } } +/** + * amdgpu_vce_required_gart_pages() - gets number of GART pages required by VCE + * + * @adev: amdgpu_device pointer + * + * Returns how many GART pages we need before GTT for the VCE IP block. + * For VCE1, see vce_v1_0_ensure_vcpu_bo_32bit_addr for details. + * For VCE2+, this is not needed so return zero. + */ +u32 amdgpu_vce_required_gart_pages(struct amdgpu_device *adev) +{ + /* VCE IP block not added yet, so can't use amdgpu_ip_version */ + if (adev->family == AMDGPU_FAMILY_SI) + return 512; + + return 0; +} + /** * amdgpu_vce_get_create_msg - generate a VCE create msg * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h index 050783802623..1c3464ce5037 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h @@ -61,6 +61,7 @@ int amdgpu_vce_entity_init(struct amdgpu_device *adev, struct amdgpu_ring *ring) int amdgpu_vce_suspend(struct amdgpu_device *adev); int amdgpu_vce_resume(struct amdgpu_device *adev); void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp); +u32 amdgpu_vce_required_gart_pages(struct amdgpu_device *adev); int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, struct amdgpu_job *job, struct amdgpu_ib *ib); int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c index bf9f943852cb..9ae424618556 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c @@ -34,6 +34,7 @@ #include "amdgpu.h" #include "amdgpu_vce.h" +#include "amdgpu_gart.h" #include "sid.h" #include "vce_v1_0.h" #include "vce/vce_1_0_d.h" @@ -46,6 +47,11 @@ #define VCE_V1_0_DATA_SIZE (7808 * (AMDGPU_MAX_VCE_HANDLES + 1)) #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02 +#define VCE_V1_0_GART_PAGE_START \ + (AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS) +#define VCE_V1_0_GART_ADDR_START \ + (VCE_V1_0_GART_PAGE_START * AMDGPU_GPU_PAGE_SIZE) + static void vce_v1_0_set_ring_funcs(struct amdgpu_device *adev); static void vce_v1_0_set_irq_funcs(struct amdgpu_device *adev); @@ -513,6 +519,49 @@ static int vce_v1_0_early_init(struct amdgpu_ip_block *ip_block) return 0; } +/** + * vce_v1_0_ensure_vcpu_bo_32bit_addr() - ensure the VCPU BO has a 32-bit address + * + * @adev: amdgpu_device pointer + * + * Due to various hardware limitations, the VCE1 requires + * the VCPU BO to be in the low 32 bit address range. + * Ensure that the VCPU BO has a 32-bit GPU address, + * or return an error code when that isn't possible. + * + * To accomodate that, we put GART to the LOW address range + * and reserve some GART pages where we map the VCPU BO, + * so that it gets a 32-bit address. + */ +static int vce_v1_0_ensure_vcpu_bo_32bit_addr(struct amdgpu_device *adev) +{ + u64 gpu_addr = amdgpu_bo_gpu_offset(adev->vce.vcpu_bo); + u64 bo_size = amdgpu_bo_size(adev->vce.vcpu_bo); + u64 max_vcpu_bo_addr = 0xffffffff - bo_size; + u64 num_pages = ALIGN(bo_size, AMDGPU_GPU_PAGE_SIZE) / AMDGPU_GPU_PAGE_SIZE; + u64 pa = amdgpu_gmc_vram_pa(adev, adev->vce.vcpu_bo); + u64 flags = AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | AMDGPU_PTE_VALID; + + /* + * Check if the VCPU BO already has a 32-bit address. + * Eg. if MC is configured to put VRAM in the low address range. + */ + if (gpu_addr <= max_vcpu_bo_addr) + return 0; + + /* Check if we can map the VCPU BO in GART to a 32-bit address. */ + if (adev->gmc.gart_start + VCE_V1_0_GART_ADDR_START > max_vcpu_bo_addr) + return -EINVAL; + + amdgpu_gart_map_vram_range(adev, pa, VCE_V1_0_GART_PAGE_START, + num_pages, flags, adev->gart.ptr); + adev->vce.gpu_addr = adev->gmc.gart_start + VCE_V1_0_GART_ADDR_START; + if (adev->vce.gpu_addr > max_vcpu_bo_addr) + return -EINVAL; + + return 0; +} + static int vce_v1_0_sw_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; @@ -532,6 +581,9 @@ static int vce_v1_0_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; r = vce_v1_0_load_fw_signature(adev); + if (r) + return r; + r = vce_v1_0_ensure_vcpu_bo_32bit_addr(adev); if (r) return r; @@ -647,6 +699,9 @@ static int vce_v1_0_resume(struct amdgpu_ip_block *ip_block) if (r) return r; r = vce_v1_0_load_fw_signature(adev); + if (r) + return r; + r = vce_v1_0_ensure_vcpu_bo_32bit_addr(adev); if (r) return r; From 53cc70f8f143d62341b7653e829e6d2af04946dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Fri, 7 Nov 2025 16:57:43 +0100 Subject: [PATCH 50/67] drm/amd/pm/si: Hook up VCE1 to SI DPM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On SI GPUs, the SMC needs to be aware of whether or not the VCE1 is used. The VCE1 is enabled/disabled through the DPM code. Also print VCE clocks in amdgpu_pm_info. Users can inspect the current power state using: cat /sys/kernel/debug/dri//amdgpu_pm_info Signed-off-by: Timur Kristóf Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c index 020e05c137e4..1f539cc65f41 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c @@ -7046,13 +7046,20 @@ static void si_set_vce_clock(struct amdgpu_device *adev, if ((old_rps->evclk != new_rps->evclk) || (old_rps->ecclk != new_rps->ecclk)) { /* Turn the clocks on when encoding, off otherwise */ + dev_dbg(adev->dev, "set VCE clocks: %u, %u\n", new_rps->evclk, new_rps->ecclk); + if (new_rps->evclk || new_rps->ecclk) { - /* Place holder for future VCE1.0 porting to amdgpu - vce_v1_0_enable_mgcg(adev, false, false);*/ + amdgpu_asic_set_vce_clocks(adev, new_rps->evclk, new_rps->ecclk); + amdgpu_device_ip_set_clockgating_state( + adev, AMD_IP_BLOCK_TYPE_VCE, AMD_CG_STATE_UNGATE); + amdgpu_device_ip_set_powergating_state( + adev, AMD_IP_BLOCK_TYPE_VCE, AMD_PG_STATE_UNGATE); } else { - /* Place holder for future VCE1.0 porting to amdgpu - vce_v1_0_enable_mgcg(adev, true, false); - amdgpu_asic_set_vce_clocks(adev, new_rps->evclk, new_rps->ecclk);*/ + amdgpu_device_ip_set_powergating_state( + adev, AMD_IP_BLOCK_TYPE_VCE, AMD_PG_STATE_GATE); + amdgpu_device_ip_set_clockgating_state( + adev, AMD_IP_BLOCK_TYPE_VCE, AMD_CG_STATE_GATE); + amdgpu_asic_set_vce_clocks(adev, 0, 0); } } } @@ -7574,6 +7581,7 @@ static void si_dpm_debugfs_print_current_performance_level(void *handle, } else { pl = &ps->performance_levels[current_index]; seq_printf(m, "uvd vclk: %d dclk: %d\n", rps->vclk, rps->dclk); + seq_printf(m, "vce evclk: %d ecclk: %d\n", rps->evclk, rps->ecclk); seq_printf(m, "power level %d sclk: %u mclk: %u vddc: %u vddci: %u pcie gen: %u\n", current_index, pl->sclk, pl->mclk, pl->vddc, pl->vddci, pl->pcie_gen + 1); } From eabc71661f01c74e5da3182b268170626e8bd290 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Fri, 7 Nov 2025 16:57:44 +0100 Subject: [PATCH 51/67] drm/amdgpu/vce1: Enable VCE1 on Tahiti, Pitcairn, Cape Verde GPUs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the VCE1 IP block to the SI GPUs that have it. Advertise the encoder capabilities corresponding to VCE1, so the userspace applications can detect and use it. Signed-off-by: Timur Kristóf Co-developed-by: Alexandre Demers Signed-off-by: Alexandre Demers Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/si.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index e0f139de7991..9d769222784c 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -45,6 +45,7 @@ #include "dce_v6_0.h" #include "si.h" #include "uvd_v3_1.h" +#include "vce_v1_0.h" #include "uvd/uvd_4_0_d.h" @@ -921,8 +922,6 @@ static const u32 hainan_mgcg_cgcg_init[] = 0x3630, 0xfffffff0, 0x00000100, }; -/* XXX: update when we support VCE */ -#if 0 /* tahiti, pitcairn, verde */ static const struct amdgpu_video_codec_info tahiti_video_codecs_encode_array[] = { @@ -940,13 +939,7 @@ static const struct amdgpu_video_codecs tahiti_video_codecs_encode = .codec_count = ARRAY_SIZE(tahiti_video_codecs_encode_array), .codec_array = tahiti_video_codecs_encode_array, }; -#else -static const struct amdgpu_video_codecs tahiti_video_codecs_encode = -{ - .codec_count = 0, - .codec_array = NULL, -}; -#endif + /* oland and hainan don't support encode */ static const struct amdgpu_video_codecs hainan_video_codecs_encode = { @@ -2717,7 +2710,7 @@ int si_set_ip_blocks(struct amdgpu_device *adev) else amdgpu_device_ip_block_add(adev, &dce_v6_0_ip_block); amdgpu_device_ip_block_add(adev, &uvd_v3_1_ip_block); - /* amdgpu_device_ip_block_add(adev, &vce_v1_0_ip_block); */ + amdgpu_device_ip_block_add(adev, &vce_v1_0_ip_block); break; case CHIP_OLAND: amdgpu_device_ip_block_add(adev, &si_common_ip_block); @@ -2735,7 +2728,6 @@ int si_set_ip_blocks(struct amdgpu_device *adev) else amdgpu_device_ip_block_add(adev, &dce_v6_4_ip_block); amdgpu_device_ip_block_add(adev, &uvd_v3_1_ip_block); - /* amdgpu_device_ip_block_add(adev, &vce_v1_0_ip_block); */ break; case CHIP_HAINAN: amdgpu_device_ip_block_add(adev, &si_common_ip_block); From baefc7cdac82e39fac31818ecc799a6b9b14a312 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Fri, 7 Nov 2025 16:57:45 +0100 Subject: [PATCH 52/67] drm/amdgpu/vce1: Workaround PLL timeout on FirePro W9000 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sometimes the VCE PLL times out waiting for CTLACK/CTLACK2. When it happens, the VCE still works, but much slower. Observed on a Tahiti GPU, but not all: - FirePro W9000 has the issue - Radeon R9 280X not affected - Radeon HD 7990 not affected As a workaround, on the affected chip just don't put the VCE PLL in sleep mode. Leaving the VCE PLL in bypass mode or reset mode both work. Using bypass mode is simpler. Signed-off-by: Timur Kristóf Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/si.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index 9d769222784c..f7288372ee61 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -1918,6 +1918,14 @@ static int si_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk) ~VCEPLL_BYPASS_EN_MASK); if (!evclk || !ecclk) { + /* + * On some chips, the PLL takes way too long to get out of + * sleep mode, causing a timeout waiting on CTLACK/CTLACK2. + * Leave the PLL running in bypass mode. + */ + if (adev->pdev->device == 0x6780) + return 0; + /* Keep the Bypass mode, put PLL to sleep */ WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK, ~VCEPLL_SLEEP_MASK); From 076470b9f6f8d9c7c8ca73a9f054942a686f9ba7 Mon Sep 17 00:00:00 2001 From: Harish Kasiviswanathan Date: Tue, 28 Oct 2025 14:37:07 -0400 Subject: [PATCH 53/67] drm/amdkfd: Fix GPU mappings for APU after prefetch Fix the following corner case:- Consider a 2M huge page SVM allocation, followed by prefetch call for the first 4K page. The whole range is initially mapped with single PTE. After the prefetch, this range gets split to first page + rest of the pages. Currently, the first page mapping is not updated on MI300A (APU) since page hasn't migrated. However, after range split PTE mapping it not valid. Fix this by forcing page table update for the whole range when prefetch is called. Calling prefetch on APU doesn't improve performance. If all it deteriotes. However, functionality has to be supported. v2: Use apu_prefer_gtt as this issue doesn't apply to APUs with carveout VRAM v3: Simplify by setting the flag for all ASICs as it doesn't affect dGPU v4: Remove v2 and v3 changes. Force update_mapping when range is split at a size that is not aligned to prange granularity Suggested-by: Philip Yang Signed-off-by: Harish Kasiviswanathan Reviewed-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index c30dfb8ec236..97c2270f278f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -3693,6 +3693,8 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm, svm_range_apply_attrs(p, prange, nattr, attrs, &update_mapping); /* TODO: unmap ranges from GPU that lost access */ } + update_mapping |= !p->xnack_enabled && !list_empty(&remap_list); + list_for_each_entry_safe(prange, next, &remove_list, update_list) { pr_debug("unlink old 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, prange, prange->start, From 43d08222adf70f6b3ff6ded8dd8e8e4ed3ceff05 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Thu, 6 Nov 2025 13:49:59 +0530 Subject: [PATCH 54/67] drm/amdgpu: Avoid xgmi register access On single GPU systems, avoid accesses to XGMI link registers. Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 2f553af1d2a3..10e42b0ff7a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -298,6 +298,9 @@ int amdgpu_xgmi_get_ext_link(struct amdgpu_device *adev, int link_num) { int link_map_6_4_x[8] = { 0, 3, 1, 2, 7, 6, 4, 5 }; + if (adev->gmc.xgmi.num_physical_nodes <= 1) + return -EINVAL; + switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) { case IP_VERSION(6, 4, 0): case IP_VERSION(6, 4, 1): @@ -346,6 +349,9 @@ int amdgpu_get_xgmi_link_status(struct amdgpu_device *adev, int global_link_num) { u32 xgmi_state_reg_val; + if (adev->gmc.xgmi.num_physical_nodes <= 1) + return -EINVAL; + switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) { case IP_VERSION(6, 4, 0): case IP_VERSION(6, 4, 1): From 5c1a781af370c498b367f9b7ff9e3eb35328d4d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Sun, 9 Nov 2025 16:41:04 +0100 Subject: [PATCH 55/67] drm/radeon: Refactor how SI and CIK support is determined MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the determination into a separate function. Change radeon.si_support and radeon.cik_support so that their default value is -1 (default). This prepares the code for changing the default driver based on the chip. Signed-off-by: Timur Kristóf Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_drv.c | 78 ++++++++++++++++++----------- 1 file changed, 50 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 26ad9adc5d8c..1dfe5482d48e 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -241,12 +241,12 @@ module_param_named(uvd, radeon_uvd, int, 0444); MODULE_PARM_DESC(vce, "vce enable/disable vce support (1 = enable, 0 = disable)"); module_param_named(vce, radeon_vce, int, 0444); -int radeon_si_support = 1; -MODULE_PARM_DESC(si_support, "SI support (1 = enabled (default), 0 = disabled)"); +int radeon_si_support = -1; +MODULE_PARM_DESC(si_support, "SI support (1 = enabled, 0 = disabled, -1 = default)"); module_param_named(si_support, radeon_si_support, int, 0444); -int radeon_cik_support = 1; -MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled (default), 0 = disabled)"); +int radeon_cik_support = -1; +MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled, 0 = disabled, -1 = default)"); module_param_named(cik_support, radeon_cik_support, int, 0444); static const struct pci_device_id pciidlist[] = { @@ -256,6 +256,50 @@ MODULE_DEVICE_TABLE(pci, pciidlist); static const struct drm_driver kms_driver; +static bool radeon_support_enabled(struct device *dev, + const enum radeon_family family) +{ + const char *gen; + int module_param = -1; + bool amdgpu_support_built = IS_ENABLED(CONFIG_DRM_AMDGPU); + bool support_by_default = true; + + switch (family) { + case CHIP_TAHITI: + case CHIP_PITCAIRN: + case CHIP_VERDE: + case CHIP_OLAND: + case CHIP_HAINAN: + gen = "SI"; + module_param = radeon_si_support; + amdgpu_support_built &= IS_ENABLED(CONFIG_DRM_AMDGPU_SI); + break; + + case CHIP_BONAIRE: + case CHIP_HAWAII: + case CHIP_KAVERI: + case CHIP_KABINI: + case CHIP_MULLINS: + gen = "CIK"; + module_param = radeon_cik_support; + amdgpu_support_built &= IS_ENABLED(CONFIG_DRM_AMDGPU_CIK); + break; + + default: + /* All other chips are supported by radeon only */ + return true; + } + + if ((module_param == -1 && (support_by_default || !amdgpu_support_built)) || + module_param == 1) + return true; + + if (!module_param) + dev_info(dev, "%s support disabled by module param\n", gen); + + return false; +} + static int radeon_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { @@ -271,30 +315,8 @@ static int radeon_pci_probe(struct pci_dev *pdev, flags = ent->driver_data; - if (!radeon_si_support) { - switch (flags & RADEON_FAMILY_MASK) { - case CHIP_TAHITI: - case CHIP_PITCAIRN: - case CHIP_VERDE: - case CHIP_OLAND: - case CHIP_HAINAN: - dev_info(dev, - "SI support disabled by module param\n"); - return -ENODEV; - } - } - if (!radeon_cik_support) { - switch (flags & RADEON_FAMILY_MASK) { - case CHIP_KAVERI: - case CHIP_BONAIRE: - case CHIP_HAWAII: - case CHIP_KABINI: - case CHIP_MULLINS: - dev_info(dev, - "CIK support disabled by module param\n"); - return -ENODEV; - } - } + if (!radeon_support_enabled(dev, flags & RADEON_FAMILY_MASK)) + return -ENODEV; if (vga_switcheroo_client_probe_defer(pdev)) return -EPROBE_DEFER; From 7fe9ad4011fa550081cd6675fb8251e9c5973c59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Sun, 9 Nov 2025 16:41:05 +0100 Subject: [PATCH 56/67] drm/amdgpu: Refactor how SI and CIK support is determined MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the determination into a separate function. Change amdgpu.si_support and amdgpu.cik_support so that their default value is -1 (default). This prepares the code for changing the default driver based on the chip. Also adjust the module param documentation. Signed-off-by: Timur Kristóf Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 151 ++++++++++++++---------- 1 file changed, 86 insertions(+), 65 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index ef23acaf5a2c..d0bd1a5a3c01 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -618,39 +618,37 @@ module_param_named(timeout_period, amdgpu_watchdog_timer.period, uint, 0644); /** * DOC: si_support (int) - * Set SI support driver. This parameter works after set config CONFIG_DRM_AMDGPU_SI. For SI asic, when radeon driver is enabled, - * set value 0 to use radeon driver, while set value 1 to use amdgpu driver. The default is using radeon driver when it available, - * otherwise using amdgpu driver. + * 1 = enabled, 0 = disabled, -1 = default + * + * SI (Southern Islands) are first generation GCN GPUs, supported by both + * drivers: radeon (old) and amdgpu (new). This parameter controls whether + * amdgpu should support SI. + * By default, SI chips are supported by radeon (except when radeon is not built). + * Only relevant when CONFIG_DRM_AMDGPU_SI is enabled to build SI support in amdgpu. + * See also radeon.si_support which should be disabled when amdgpu.si_support is + * enabled, and vice versa. */ +int amdgpu_si_support = -1; #ifdef CONFIG_DRM_AMDGPU_SI - -#if IS_ENABLED(CONFIG_DRM_RADEON) || IS_ENABLED(CONFIG_DRM_RADEON_MODULE) -int amdgpu_si_support; -MODULE_PARM_DESC(si_support, "SI support (1 = enabled, 0 = disabled (default))"); -#else -int amdgpu_si_support = 1; -MODULE_PARM_DESC(si_support, "SI support (1 = enabled (default), 0 = disabled)"); -#endif - +MODULE_PARM_DESC(si_support, "SI support (1 = enabled, 0 = disabled, -1 = default)"); module_param_named(si_support, amdgpu_si_support, int, 0444); #endif /** * DOC: cik_support (int) - * Set CIK support driver. This parameter works after set config CONFIG_DRM_AMDGPU_CIK. For CIK asic, when radeon driver is enabled, - * set value 0 to use radeon driver, while set value 1 to use amdgpu driver. The default is using radeon driver when it available, - * otherwise using amdgpu driver. + * 1 = enabled, 0 = disabled, -1 = default + * + * CIK (Sea Islands) are second generation GCN GPUs, supported by both + * drivers: radeon (old) and amdgpu (new). This parameter controls whether + * amdgpu should support CIK. + * By default, CIK chips are supported by radeon (except when radeon is not built). + * Only relevant when CONFIG_DRM_AMDGPU_CIK is enabled to build CIK support in amdgpu. + * See also radeon.cik_support which should be disabled when amdgpu.cik_support is + * enabled, and vice versa. */ +int amdgpu_cik_support = -1; #ifdef CONFIG_DRM_AMDGPU_CIK - -#if IS_ENABLED(CONFIG_DRM_RADEON) || IS_ENABLED(CONFIG_DRM_RADEON_MODULE) -int amdgpu_cik_support; -MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled, 0 = disabled (default))"); -#else -int amdgpu_cik_support = 1; -MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled (default), 0 = disabled)"); -#endif - +MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled, 0 = disabled, -1 = default)"); module_param_named(cik_support, amdgpu_cik_support, int, 0444); #endif @@ -2306,6 +2304,69 @@ static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long fl return flags; } +static bool amdgpu_support_enabled(struct device *dev, + const enum amd_asic_type family) +{ + const char *gen; + const char *param; + int module_param = -1; + bool radeon_support_built = IS_ENABLED(CONFIG_DRM_RADEON); + bool amdgpu_support_built = false; + bool support_by_default = false; + + switch (family) { + case CHIP_TAHITI: + case CHIP_PITCAIRN: + case CHIP_VERDE: + case CHIP_OLAND: + case CHIP_HAINAN: + gen = "SI"; + param = "si_support"; + module_param = amdgpu_si_support; + amdgpu_support_built = IS_ENABLED(CONFIG_DRM_AMDGPU_SI); + break; + + case CHIP_BONAIRE: + case CHIP_HAWAII: + case CHIP_KAVERI: + case CHIP_KABINI: + case CHIP_MULLINS: + gen = "CIK"; + param = "cik_support"; + module_param = amdgpu_cik_support; + amdgpu_support_built = IS_ENABLED(CONFIG_DRM_AMDGPU_CIK); + break; + + default: + /* All other chips are supported by amdgpu only */ + return true; + } + + if (!amdgpu_support_built) { + dev_info(dev, "amdgpu built without %s support\n", gen); + return false; + } + + if ((module_param == -1 && (support_by_default || !radeon_support_built)) || + module_param == 1) { + if (radeon_support_built) + dev_info(dev, "%s support provided by amdgpu.\n" + "Use radeon.%s=1 amdgpu.%s=0 to override.\n", + gen, param, param); + + return true; + } + + if (radeon_support_built) + dev_info(dev, "%s support provided by radeon.\n" + "Use radeon.%s=0 amdgpu.%s=1 to override.\n", + gen, param, param); + else if (module_param == 0) + dev_info(dev, "%s support disabled by module param\n", gen); + + return false; +} + static int amdgpu_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { @@ -2353,48 +2414,8 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, return -ENOTSUPP; } - switch (flags & AMD_ASIC_MASK) { - case CHIP_TAHITI: - case CHIP_PITCAIRN: - case CHIP_VERDE: - case CHIP_OLAND: - case CHIP_HAINAN: -#ifdef CONFIG_DRM_AMDGPU_SI - if (!amdgpu_si_support) { - dev_info(&pdev->dev, - "SI support provided by radeon.\n"); - dev_info(&pdev->dev, - "Use radeon.si_support=0 amdgpu.si_support=1 to override.\n" - ); - return -ENODEV; - } - break; -#else - dev_info(&pdev->dev, "amdgpu is built without SI support.\n"); + if (!amdgpu_support_enabled(&pdev->dev, flags & AMD_ASIC_MASK)) return -ENODEV; -#endif - case CHIP_KAVERI: - case CHIP_BONAIRE: - case CHIP_HAWAII: - case CHIP_KABINI: - case CHIP_MULLINS: -#ifdef CONFIG_DRM_AMDGPU_CIK - if (!amdgpu_cik_support) { - dev_info(&pdev->dev, - "CIK support provided by radeon.\n"); - dev_info(&pdev->dev, - "Use radeon.cik_support=0 amdgpu.cik_support=1 to override.\n" - ); - return -ENODEV; - } - break; -#else - dev_info(&pdev->dev, "amdgpu is built without CIK support.\n"); - return -ENODEV; -#endif - default: - break; - } adev = devm_drm_dev_alloc(&pdev->dev, &amdgpu_kms_driver, typeof(*adev), ddev); if (IS_ERR(adev)) From b84bc92607144f04c5d140177fe45faec0ec9b9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Sun, 9 Nov 2025 16:41:06 +0100 Subject: [PATCH 57/67] drm/amdgpu: Use amdgpu by default on CIK dedicated GPUs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The amdgpu driver has been working well on CIK dGPUs for years. Now that the DC analog connector support landed, these GPUs are at feature parity with the old radeon driver. Additionally, amdgpu yields extra performance, supports Vulkan and provides more display features through DC as well as more robust power management. Signed-off-by: Timur Kristóf Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 6 +++++- drivers/gpu/drm/radeon/radeon_drv.c | 2 ++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index d0bd1a5a3c01..09d3b080a4da 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -641,7 +641,9 @@ module_param_named(si_support, amdgpu_si_support, int, 0444); * CIK (Sea Islands) are second generation GCN GPUs, supported by both * drivers: radeon (old) and amdgpu (new). This parameter controls whether * amdgpu should support CIK. - * By default, CIK chips are supported by radeon (except when radeon is not built). + * By default: + * - CIK dedicated GPUs are supported by amdgpu. + * - CIK APUs are supported by radeon (except when radeon is not built). * Only relevant when CONFIG_DRM_AMDGPU_CIK is enabled to build CIK support in amdgpu. * See also radeon.cik_support which should be disabled when amdgpu.cik_support is * enabled, and vice versa. @@ -2328,6 +2330,8 @@ static bool amdgpu_support_enabled(struct device *dev, case CHIP_BONAIRE: case CHIP_HAWAII: + support_by_default = true; + fallthrough; case CHIP_KAVERI: case CHIP_KABINI: case CHIP_MULLINS: diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 1dfe5482d48e..440145fed625 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -277,6 +277,8 @@ static bool radeon_support_enabled(struct device *dev, case CHIP_BONAIRE: case CHIP_HAWAII: + support_by_default = false; + fallthrough; case CHIP_KAVERI: case CHIP_KABINI: case CHIP_MULLINS: From f1a2cd427d0401c1888c04613fb18d272825a5c2 Mon Sep 17 00:00:00 2001 From: Will Aitken Date: Tue, 30 Sep 2025 16:24:07 +0000 Subject: [PATCH 58/67] drm/amdgpu: Refactor sriov xgmi topology filling to common code amdgpu_xgmi_fill_topology_info and psp_xgmi_reflect_topology_info perform the same logic of copying topology info of one node to every other node in the hive. Instead of having two functions that purport to do the same thing, this refactoring moves the logic of the fill function to the reflect function and adds reflecting port number info as well for complete functionality. Signed-off-by: Will Aitken Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 19 ++++++++++++----- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 27 ------------------------ 2 files changed, 14 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index aa7987d0806c..76ab8462d9ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -1539,6 +1539,7 @@ static void psp_xgmi_reflect_topology_info(struct psp_context *psp, uint64_t src_node_id = psp->adev->gmc.xgmi.node_id; uint64_t dst_node_id = node_info.node_id; uint8_t dst_num_hops = node_info.num_hops; + uint8_t dst_is_sharing_enabled = node_info.is_sharing_enabled; uint8_t dst_num_links = node_info.num_links; hive = amdgpu_get_xgmi_hive(psp->adev); @@ -1558,13 +1559,20 @@ static void psp_xgmi_reflect_topology_info(struct psp_context *psp, continue; mirror_top_info->nodes[j].num_hops = dst_num_hops; - /* - * prevent 0 num_links value re-reflection since reflection + mirror_top_info->nodes[j].is_sharing_enabled = dst_is_sharing_enabled; + /* prevent 0 num_links value re-reflection since reflection * criteria is based on num_hops (direct or indirect). - * */ - if (dst_num_links) + if (dst_num_links) { mirror_top_info->nodes[j].num_links = dst_num_links; + /* swap src and dst due to frame of reference */ + for (int k = 0; k < dst_num_links; k++) { + mirror_top_info->nodes[j].port_num[k].src_xgmi_port_num = + node_info.port_num[k].dst_xgmi_port_num; + mirror_top_info->nodes[j].port_num[k].dst_xgmi_port_num = + node_info.port_num[k].src_xgmi_port_num; + } + } break; } @@ -1639,7 +1647,8 @@ int psp_xgmi_get_topology_info(struct psp_context *psp, amdgpu_ip_version(psp->adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) || amdgpu_ip_version(psp->adev, MP0_HWIP, 0) == - IP_VERSION(13, 0, 14); + IP_VERSION(13, 0, 14) || + amdgpu_sriov_vf(psp->adev); bool ta_port_num_support = amdgpu_sriov_vf(psp->adev) ? 0 : psp->xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 10e42b0ff7a2..aad530c46a9f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -968,28 +968,6 @@ static int amdgpu_xgmi_initialize_hive_get_data_partition(struct amdgpu_hive_inf return 0; } -static void amdgpu_xgmi_fill_topology_info(struct amdgpu_device *adev, - struct amdgpu_device *peer_adev) -{ - struct psp_xgmi_topology_info *top_info = &adev->psp.xgmi_context.top_info; - struct psp_xgmi_topology_info *peer_info = &peer_adev->psp.xgmi_context.top_info; - - for (int i = 0; i < peer_info->num_nodes; i++) { - if (peer_info->nodes[i].node_id == adev->gmc.xgmi.node_id) { - for (int j = 0; j < top_info->num_nodes; j++) { - if (top_info->nodes[j].node_id == peer_adev->gmc.xgmi.node_id) { - peer_info->nodes[i].num_hops = top_info->nodes[j].num_hops; - peer_info->nodes[i].is_sharing_enabled = - top_info->nodes[j].is_sharing_enabled; - peer_info->nodes[i].num_links = - top_info->nodes[j].num_links; - return; - } - } - } - } -} - int amdgpu_xgmi_add_device(struct amdgpu_device *adev) { struct psp_xgmi_topology_info *top_info; @@ -1075,11 +1053,6 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) /* To do: continue with some node failed or disable the whole hive*/ goto exit_unlock; } - - /* fill the topology info for peers instead of getting from PSP */ - list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { - amdgpu_xgmi_fill_topology_info(adev, tmp_adev); - } } else { /* get latest topology info for each device from psp */ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { From 4d0e55965c9c2f085a83f6a69acd7b5a44132de5 Mon Sep 17 00:00:00 2001 From: Will Aitken Date: Tue, 7 Oct 2025 14:19:45 +0000 Subject: [PATCH 59/67] drm/amdgpu: Update headers for sriov xgmi ext peer link support feature flag Adds new sriov msg flag to match host, feature flag in the amdgim enum, and a wrapper macro to check it. Signed-off-by: Will Aitken Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 4 ++++ drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index 14d864be5800..4fd194a9a972 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -150,6 +150,7 @@ enum AMDGIM_FEATURE_FLAG { AMDGIM_FEATURE_RAS_CAPS = (1 << 9), AMDGIM_FEATURE_RAS_TELEMETRY = (1 << 10), AMDGIM_FEATURE_RAS_CPER = (1 << 11), + AMDGIM_FEATURE_XGMI_TA_EXT_PEER_LINK = (1 << 12), }; enum AMDGIM_REG_ACCESS_FLAG { @@ -395,6 +396,9 @@ struct amdgpu_video_codec_info; #define amdgpu_sriov_ras_cper_en(adev) \ ((adev)->virt.gim_feature & AMDGIM_FEATURE_RAS_CPER) +#define amdgpu_sriov_xgmi_ta_ext_peer_link_en(adev) \ +((adev)->virt.gim_feature & AMDGIM_FEATURE_XGMI_TA_EXT_PEER_LINK) + static inline bool is_virtual_machine(void) { #if defined(CONFIG_X86) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h index 1cee083fb6bd..ba23bf982d7b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h @@ -160,7 +160,8 @@ union amd_sriov_msg_feature_flags { uint32_t ras_caps : 1; uint32_t ras_telemetry : 1; uint32_t ras_cper : 1; - uint32_t reserved : 20; + uint32_t xgmi_ta_ext_peer_link : 1; + uint32_t reserved : 19; } flags; uint32_t all; }; From f8bdb559c01e8291f1862ee9ab4a7eb13d6d62ac Mon Sep 17 00:00:00 2001 From: Will Aitken Date: Tue, 7 Oct 2025 14:49:15 +0000 Subject: [PATCH 60/67] drm/amdgpu: Enable xgmi extended peer links for sriov guest The amd-smi tool relies on extended peer link information to report xgmi link metrics. The necessary xgmi ta command, GET_EXTEND_PEER_LINKS, has been enabled in the host driver and this change is necessary for the guest to make use of it. To handle the case where the host driver does not have the latest xgmi ta, the guest driver checks for guest support through a pf2vf feature flag before invoking psp. Signed-off-by: Will Aitken Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 76ab8462d9ed..0b10497d487c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -1649,8 +1649,8 @@ int psp_xgmi_get_topology_info(struct psp_context *psp, amdgpu_ip_version(psp->adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14) || amdgpu_sriov_vf(psp->adev); - bool ta_port_num_support = amdgpu_sriov_vf(psp->adev) ? 0 : - psp->xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG; + bool ta_port_num_support = psp->xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG || + amdgpu_sriov_xgmi_ta_ext_peer_link_en(psp->adev); /* popluate the shared output buffer rather than the cmd input buffer * with node_ids as the input for GET_PEER_LINKS command execution. From 20459c098d688971237e56e30263ddce467c8c9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 29 Oct 2025 15:36:32 +0100 Subject: [PATCH 61/67] drm/amdgpu: avoid memory allocation in the critical code path v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we run out of VMIDs we need to wait for some to become available. Previously we were using a dma_fence_array for that, but this means that we have to allocate memory. Instead just wait for the first not signaled fence from the least recently used VMID to signal. That is not as efficient since we end up in this function multiple times again, but allocating memory can easily fail or deadlock if we have to wait for memory to become available. v2: remove now unused VM manager fields v3: fix dma_fence reference Signed-off-by: Christian König Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4258 Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 52 +++++++------------------ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 7 ---- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 4 -- 3 files changed, 14 insertions(+), 49 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 3ef5bc95642c..b2af2cc6826c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -201,58 +201,34 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_ring *ring, struct amdgpu_device *adev = ring->adev; unsigned vmhub = ring->vm_hub; struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; - struct dma_fence **fences; - unsigned i; + /* If anybody is waiting for a VMID let everybody wait for fairness */ if (!dma_fence_is_signaled(ring->vmid_wait)) { *fence = dma_fence_get(ring->vmid_wait); return 0; } - fences = kmalloc_array(id_mgr->num_ids, sizeof(void *), GFP_NOWAIT); - if (!fences) - return -ENOMEM; - /* Check if we have an idle VMID */ - i = 0; - list_for_each_entry((*idle), &id_mgr->ids_lru, list) { + list_for_each_entry_reverse((*idle), &id_mgr->ids_lru, list) { /* Don't use per engine and per process VMID at the same time */ struct amdgpu_ring *r = adev->vm_manager.concurrent_flush ? NULL : ring; - fences[i] = amdgpu_sync_peek_fence(&(*idle)->active, r); - if (!fences[i]) - break; - ++i; + *fence = amdgpu_sync_peek_fence(&(*idle)->active, r); + if (!(*fence)) + return 0; } - /* If we can't find a idle VMID to use, wait till one becomes available */ - if (&(*idle)->list == &id_mgr->ids_lru) { - u64 fence_context = adev->vm_manager.fence_context + ring->idx; - unsigned seqno = ++adev->vm_manager.seqno[ring->idx]; - struct dma_fence_array *array; - unsigned j; - - *idle = NULL; - for (j = 0; j < i; ++j) - dma_fence_get(fences[j]); - - array = dma_fence_array_create(i, fences, fence_context, - seqno, true); - if (!array) { - for (j = 0; j < i; ++j) - dma_fence_put(fences[j]); - kfree(fences); - return -ENOMEM; - } - - *fence = dma_fence_get(&array->base); - dma_fence_put(ring->vmid_wait); - ring->vmid_wait = &array->base; - return 0; - } - kfree(fences); + /* + * If we can't find a idle VMID to use, wait on a fence from the least + * recently used in the hope that it will be available soon. + */ + *idle = NULL; + dma_fence_put(ring->vmid_wait); + ring->vmid_wait = dma_fence_get(*fence); + /* This is the reference we return */ + dma_fence_get(*fence); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 700b4a776532..7fc081e88b6a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2843,8 +2843,6 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) */ void amdgpu_vm_manager_init(struct amdgpu_device *adev) { - unsigned i; - /* Concurrent flushes are only possible starting with Vega10 and * are broken on Navi10 and Navi14. */ @@ -2853,11 +2851,6 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) adev->asic_type == CHIP_NAVI14); amdgpu_vmid_mgr_init(adev); - adev->vm_manager.fence_context = - dma_fence_context_alloc(AMDGPU_MAX_RINGS); - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) - adev->vm_manager.seqno[i] = 0; - spin_lock_init(&adev->vm_manager.prt_lock); atomic_set(&adev->vm_manager.num_prt_users, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index cf0ec94e8a07..15d757c016cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -453,10 +453,6 @@ struct amdgpu_vm_manager { unsigned int first_kfd_vmid; bool concurrent_flush; - /* Handling of VM fences */ - u64 fence_context; - unsigned seqno[AMDGPU_MAX_RINGS]; - uint64_t max_pfn; uint32_t num_level; uint32_t block_size; From 991a4343b47acd34ba3541be76a5b3ff6fa71f9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 28 Oct 2025 11:16:12 +0100 Subject: [PATCH 62/67] drm/amdgpu: use GFP_ATOMIC instead of NOWAIT in the critical path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise job submissions can fail with ENOMEM. We probably need to re-design the per VMID tracking at some point. Signed-off-by: Christian König Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4258 Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index b2af2cc6826c..9cab36322c16 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -289,7 +289,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, * user of the VMID. */ r = amdgpu_sync_fence(&(*id)->active, &job->base.s_fence->finished, - GFP_NOWAIT); + GFP_ATOMIC); if (r) return r; @@ -349,7 +349,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm, */ r = amdgpu_sync_fence(&(*id)->active, &job->base.s_fence->finished, - GFP_NOWAIT); + GFP_ATOMIC); if (r) return r; @@ -402,7 +402,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, /* Remember this submission as user of the VMID */ r = amdgpu_sync_fence(&id->active, &job->base.s_fence->finished, - GFP_NOWAIT); + GFP_ATOMIC); if (r) goto error; From c0e870407bcaf4c1e074aedb4df3a43eb3f05303 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Tue, 11 Nov 2025 16:56:35 +0800 Subject: [PATCH 63/67] drm/amdgpu: Synchronize sriov host to add block_mmsch bit field Synchronize sriov host to add block_mmsch bit field. Signed-off-by: YiPeng Chai Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h index ba23bf982d7b..3cdb1e0eca37 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h @@ -199,8 +199,9 @@ union amd_sriov_ras_caps { uint64_t block_jpeg : 1; uint64_t block_ih : 1; uint64_t block_mpio : 1; + uint64_t block_mmsch : 1; uint64_t poison_propogation_mode : 1; - uint64_t reserved : 44; + uint64_t reserved : 43; } bits; uint64_t all; }; From 2b198d459f0983313fc090b1527c68569f406575 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Mon, 21 Jul 2025 15:22:27 +0800 Subject: [PATCH 64/67] drm/amdgpu: Add lock to serialize sriov command execution Add lock to serialize sriov command execution. Signed-off-by: YiPeng Chai Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 2 ++ drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c | 17 ++++++++++++----- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index f2ce8f506aa8..47a6ce4fdc74 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -917,6 +917,7 @@ static void amdgpu_virt_init_ras(struct amdgpu_device *adev) RATELIMIT_MSG_ON_RELEASE); mutex_init(&adev->virt.ras.ras_telemetry_mutex); + mutex_init(&adev->virt.access_req_mutex); adev->virt.ras.cper_rptr = 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index 4fd194a9a972..01d5bca2dee1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -325,6 +325,8 @@ struct amdgpu_virt { /* Spinlock to protect access to the RLCG register interface */ spinlock_t rlcg_reg_lock; + struct mutex access_req_mutex; + union amd_sriov_ras_caps ras_en_caps; union amd_sriov_ras_caps ras_telemetry_en_caps; struct amdgpu_virt_ras ras; diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c index cd5b2f07edb8..e7cd07383d56 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c @@ -173,13 +173,17 @@ static void xgpu_nv_mailbox_trans_msg (struct amdgpu_device *adev, static int xgpu_nv_send_access_requests_with_param(struct amdgpu_device *adev, enum idh_request req, u32 data1, u32 data2, u32 data3) { - int r, retry = 1; + struct amdgpu_virt *virt = &adev->virt; + int r = 0, retry = 1; enum idh_event event = -1; + mutex_lock(&virt->access_req_mutex); send_request: - if (amdgpu_ras_is_rma(adev)) - return -ENODEV; + if (amdgpu_ras_is_rma(adev)) { + r = -ENODEV; + goto out; + } xgpu_nv_mailbox_trans_msg(adev, req, data1, data2, data3); @@ -217,7 +221,7 @@ static int xgpu_nv_send_access_requests_with_param(struct amdgpu_device *adev, if (req != IDH_REQ_GPU_INIT_DATA) { dev_err(adev->dev, "Doesn't get msg:%d from pf, error=%d\n", event, r); - return r; + goto out; } else /* host doesn't support REQ_GPU_INIT_DATA handshake */ adev->virt.req_init_data_ver = 0; } else { @@ -246,7 +250,10 @@ static int xgpu_nv_send_access_requests_with_param(struct amdgpu_device *adev, } } - return 0; +out: + mutex_unlock(&virt->access_req_mutex); + + return r; } static int xgpu_nv_send_access_requests(struct amdgpu_device *adev, From 6a37539973f8a38e7e165a6bcdeb107edf7c490a Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Thu, 23 Oct 2025 14:47:07 +0800 Subject: [PATCH 65/67] drm/amdgpu: Fix the issue of missing ras message on sriov host This code only applies to amdgpu processing poison consumption after uniras is enabled, but not to sriov. Signed-off-by: YiPeng Chai Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 15 --------------- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 10 ++++++++++ 2 files changed, 10 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 644f79f3c9af..a2879d2b7c8e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -36,7 +36,6 @@ #include "amdgpu_ras.h" #include "amdgpu_umc.h" #include "amdgpu_reset.h" -#include "amdgpu_ras_mgr.h" /* Total memory size in system memory and all GPU VRAM. Used to * estimate worst case amount of memory to reserve for page tables @@ -747,20 +746,6 @@ void amdgpu_amdkfd_ras_pasid_poison_consumption_handler(struct amdgpu_device *ad enum amdgpu_ras_block block, uint16_t pasid, pasid_notify pasid_fn, void *data, uint32_t reset) { - - if (amdgpu_uniras_enabled(adev)) { - struct ras_ih_info ih_info; - - memset(&ih_info, 0, sizeof(ih_info)); - ih_info.block = block; - ih_info.pasid = pasid; - ih_info.reset = reset; - ih_info.pasid_fn = pasid_fn; - ih_info.data = data; - amdgpu_ras_mgr_handle_consumer_interrupt(adev, &ih_info); - return; - } - amdgpu_umc_pasid_poison_handler(adev, block, pasid, pasid_fn, data, reset); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index 3eb252de343b..3f0b0e9af4f3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -24,6 +24,7 @@ #include #include "amdgpu.h" #include "umc_v6_7.h" +#include "amdgpu_ras_mgr.h" #define MAX_UMC_POISON_POLLING_TIME_SYNC 20 //ms #define MAX_UMC_HASH_STRING_SIZE 256 @@ -273,6 +274,15 @@ int amdgpu_umc_pasid_poison_handler(struct amdgpu_device *adev, } amdgpu_ras_error_data_fini(&err_data); + } else if (amdgpu_uniras_enabled(adev)) { + struct ras_ih_info ih_info = {0}; + + ih_info.block = block; + ih_info.pasid = pasid; + ih_info.reset = reset; + ih_info.pasid_fn = pasid_fn; + ih_info.data = data; + amdgpu_ras_mgr_handle_consumer_interrupt(adev, &ih_info); } else { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); int ret; From c034426671d0aa3eed9a7cc924bdd2ae3b76fd5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Fri, 14 Nov 2025 09:26:08 -0500 Subject: [PATCH 66/67] drm/amdgpu: Use amdgpu by default on CIK dedicated GPUs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The amdgpu driver has been working well on CIK dGPUs for years. Now that the DC analog connector support landed, amdgpu is at feature parity with the old radeon driver on CIK dGPUs. Enabling the amdgpu driver by default for CIK dGPUs has the following benefits: - More stable OpenGL support through RadeonSI - Vulkan support through RADV - Improved performance - Better display features through DC Users who want to keep using the old driver can do so using: amdgpu.cik_support=0 radeon.cik_support=1 v2: - Update documentation in Kconfig file v3: - Rebase documentation updates (Alex) Reviewed-by: Christian König Signed-off-by: Timur Kristóf Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/Kconfig | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index 1acfed2f92ef..883f32428871 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -59,11 +59,17 @@ config DRM_AMDGPU_CIK Choose this option if you want to enable support for CIK (Sea Islands) asics. - CIK is already supported in radeon. Support for CIK in amdgpu - will be disabled by default and is still provided by radeon. - Use module options to override this: + CIK (Sea Islands) are second generation GCN GPUs, + supported by both drivers: radeon (old) and amdgpu (new). + By default, + CIK dedicated GPUs are supported by amdgpu + CIK APUs are supported by radeon + Use module options to override this: + To use amdgpu for CIK, radeon.cik_support=0 amdgpu.cik_support=1 + To use radeon for CIK, + radeon.cik_support=1 amdgpu.cik_support=0 config DRM_AMDGPU_USERPTR bool "Always enable userptr write support" From ccd3b4c7c37fbbd3e5244d3c54ca24ae0a37810d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Fri, 14 Nov 2025 13:07:36 +0100 Subject: [PATCH 67/67] drm/amdgpu: Use amdgpu by default on SI dedicated GPUs (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that the DC analog connector support and VCE1 support landed, amdgpu is at feature parity with the old radeon driver on SI dGPUs. Enabling the amdgpu driver by default for SI dGPUs has the following benefits: - More stable OpenGL support through RadeonSI - Vulkan support through RADV - Improved performance - Better display features through DC Users who want to keep using the old driver can do so using: amdgpu.si_support=0 radeon.si_support=1 v2: - Update documentation in Kconfig file Reviewed-by: Christian König Signed-off-by: Timur Kristóf Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/Kconfig | 12 +++++++----- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++- drivers/gpu/drm/radeon/radeon_drv.c | 1 + 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index 883f32428871..7f515be5185d 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -43,14 +43,16 @@ config DRM_AMDGPU_SI bool "Enable amdgpu support for SI parts" depends on DRM_AMDGPU help - Choose this option if you want to enable experimental support + Choose this option if you want to enable support for SI (Southern Islands) asics. - SI is already supported in radeon. Experimental support for SI - in amdgpu will be disabled by default and is still provided by - radeon. Use module options to override this: + SI (Southern Islands) are first generation GCN GPUs, + supported by both drivers: radeon (old) and amdgpu (new). + By default, SI dedicated GPUs are supported by amdgpu. - radeon.si_support=0 amdgpu.si_support=1 + Use module options to override this: + To use radeon for SI, + radeon.si_support=1 amdgpu.si_support=0 config DRM_AMDGPU_CIK bool "Enable amdgpu support for CIK parts" diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 09d3b080a4da..2dfbddcef9ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -623,7 +623,7 @@ module_param_named(timeout_period, amdgpu_watchdog_timer.period, uint, 0644); * SI (Southern Islands) are first generation GCN GPUs, supported by both * drivers: radeon (old) and amdgpu (new). This parameter controls whether * amdgpu should support SI. - * By default, SI chips are supported by radeon (except when radeon is not built). + * By default, SI dedicated GPUs are supported by amdgpu. * Only relevant when CONFIG_DRM_AMDGPU_SI is enabled to build SI support in amdgpu. * See also radeon.si_support which should be disabled when amdgpu.si_support is * enabled, and vice versa. @@ -2326,6 +2326,7 @@ static bool amdgpu_support_enabled(struct device *dev, param = "si_support"; module_param = amdgpu_si_support; amdgpu_support_built = IS_ENABLED(CONFIG_DRM_AMDGPU_SI); + support_by_default = true; break; case CHIP_BONAIRE: diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 440145fed625..87fd6255c114 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -273,6 +273,7 @@ static bool radeon_support_enabled(struct device *dev, gen = "SI"; module_param = radeon_si_support; amdgpu_support_built &= IS_ENABLED(CONFIG_DRM_AMDGPU_SI); + support_by_default = false; break; case CHIP_BONAIRE: