From 919bfa9b2dbf3bc0c478afd4e44445836381dacb Mon Sep 17 00:00:00 2001 From: K Prateek Nayak Date: Tue, 10 Dec 2024 03:25:57 +0000 Subject: [PATCH 01/21] cpufreq/amd-pstate: Detect preferred core support before driver registration Booting with amd-pstate on 3rd Generation EPYC system incorrectly enabled ITMT support despite the system not supporting Preferred Core ranking. amd_pstate_init_prefcore() called during amd_pstate*_cpu_init() requires "amd_pstate_prefcore" to be set correctly however the preferred core support is detected only after driver registration which is too late. Swap the function calls around to detect preferred core support before registring the driver via amd_pstate_register_driver(). This ensures amd_pstate*_cpu_init() sees the correct value of "amd_pstate_prefcore" considering the platform support. Fixes: 279f838a61f9 ("x86/amd: Detect preferred cores in amd_get_boost_ratio_numerator()") Fixes: ff2653ded4d9 ("cpufreq/amd-pstate: Move registration after static function call update") Signed-off-by: K Prateek Nayak Acked-by: Mario Limonciello Link: https://lore.kernel.org/r/20241210032557.754-1-kprateek.nayak@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index d7630bab2516..8b36450bbdf6 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -1869,18 +1869,18 @@ static int __init amd_pstate_init(void) static_call_update(amd_pstate_update_perf, shmem_update_perf); } - ret = amd_pstate_register_driver(cppc_state); - if (ret) { - pr_err("failed to register with return %d\n", ret); - return ret; - } - if (amd_pstate_prefcore) { ret = amd_detect_prefcore(&amd_pstate_prefcore); if (ret) return ret; } + ret = amd_pstate_register_driver(cppc_state); + if (ret) { + pr_err("failed to register with return %d\n", ret); + return ret; + } + dev_root = bus_get_dev_root(&cpu_subsys); if (dev_root) { ret = sysfs_create_group(&dev_root->kobj, &amd_pstate_global_attr_group); From 50a062a7620051c09adacd6d140ebd56881a333b Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Dec 2024 12:52:34 -0600 Subject: [PATCH 02/21] cpufreq/amd-pstate: Store the boost numerator as highest perf again commit ad4caad58d91d ("cpufreq: amd-pstate: Merge amd_pstate_highest_perf_set() into amd_get_boost_ratio_numerator()") changed the semantics for highest perf and commit 18d9b52271213 ("cpufreq/amd-pstate: Use nominal perf for limits when boost is disabled") worked around those semantic changes. This however is a confusing result and furthermore makes it awkward to change frequency limits and boost due to the scaling differences. Restore the boost numerator to highest perf again. Suggested-by: Dhananjay Ugwekar Reviewed-by: Gautham R. Shenoy Fixes: ad4caad58d91 ("cpufreq: amd-pstate: Merge amd_pstate_highest_perf_set() into amd_get_boost_ratio_numerator()") Link: https://lore.kernel.org/r/20241209185248.16301-2-mario.limonciello@amd.com Signed-off-by: Mario Limonciello --- Documentation/admin-guide/pm/amd-pstate.rst | 4 +--- drivers/cpufreq/amd-pstate.c | 25 ++++++++++++--------- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/Documentation/admin-guide/pm/amd-pstate.rst b/Documentation/admin-guide/pm/amd-pstate.rst index 210a808b74ec..412423c54f25 100644 --- a/Documentation/admin-guide/pm/amd-pstate.rst +++ b/Documentation/admin-guide/pm/amd-pstate.rst @@ -251,9 +251,7 @@ performance supported in `AMD CPPC Performance Capability `_). In some ASICs, the highest CPPC performance is not the one in the ``_CPC`` table, so we need to expose it to sysfs. If boost is not active, but still supported, this maximum frequency will be larger than the one in -``cpuinfo``. On systems that support preferred core, the driver will have -different values for some cores than others and this will reflect the values -advertised by the platform at bootup. +``cpuinfo``. This attribute is read-only. ``amd_pstate_lowest_nonlinear_freq`` diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 8b36450bbdf6..ab6fe9c2150c 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -374,15 +374,19 @@ static inline int amd_pstate_cppc_enable(bool enable) static int msr_init_perf(struct amd_cpudata *cpudata) { - u64 cap1; + u64 cap1, numerator; int ret = rdmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, &cap1); if (ret) return ret; - WRITE_ONCE(cpudata->highest_perf, AMD_CPPC_HIGHEST_PERF(cap1)); - WRITE_ONCE(cpudata->max_limit_perf, AMD_CPPC_HIGHEST_PERF(cap1)); + ret = amd_get_boost_ratio_numerator(cpudata->cpu, &numerator); + if (ret) + return ret; + + WRITE_ONCE(cpudata->highest_perf, numerator); + WRITE_ONCE(cpudata->max_limit_perf, numerator); WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1)); WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1)); WRITE_ONCE(cpudata->lowest_perf, AMD_CPPC_LOWEST_PERF(cap1)); @@ -394,13 +398,18 @@ static int msr_init_perf(struct amd_cpudata *cpudata) static int shmem_init_perf(struct amd_cpudata *cpudata) { struct cppc_perf_caps cppc_perf; + u64 numerator; int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); if (ret) return ret; - WRITE_ONCE(cpudata->highest_perf, cppc_perf.highest_perf); - WRITE_ONCE(cpudata->max_limit_perf, cppc_perf.highest_perf); + ret = amd_get_boost_ratio_numerator(cpudata->cpu, &numerator); + if (ret) + return ret; + + WRITE_ONCE(cpudata->highest_perf, numerator); + WRITE_ONCE(cpudata->max_limit_perf, numerator); WRITE_ONCE(cpudata->nominal_perf, cppc_perf.nominal_perf); WRITE_ONCE(cpudata->lowest_nonlinear_perf, cppc_perf.lowest_nonlinear_perf); @@ -889,7 +898,6 @@ static int amd_pstate_init_freq(struct amd_cpudata *cpudata) { int ret; u32 min_freq, max_freq; - u64 numerator; u32 nominal_perf, nominal_freq; u32 lowest_nonlinear_perf, lowest_nonlinear_freq; u32 boost_ratio, lowest_nonlinear_ratio; @@ -911,10 +919,7 @@ static int amd_pstate_init_freq(struct amd_cpudata *cpudata) nominal_perf = READ_ONCE(cpudata->nominal_perf); - ret = amd_get_boost_ratio_numerator(cpudata->cpu, &numerator); - if (ret) - return ret; - boost_ratio = div_u64(numerator << SCHED_CAPACITY_SHIFT, nominal_perf); + boost_ratio = div_u64(cpudata->highest_perf << SCHED_CAPACITY_SHIFT, nominal_perf); max_freq = (nominal_freq * boost_ratio >> SCHED_CAPACITY_SHIFT) * 1000; lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf); From 2993b29b2a98f2bc9d55dfd37ef39f56a2908748 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Dec 2024 12:52:35 -0600 Subject: [PATCH 03/21] cpufreq/amd-pstate: Use boost numerator for upper bound of frequencies commit 18d9b5227121 ("cpufreq/amd-pstate: Use nominal perf for limits when boost is disabled") introduced different semantics for min/max limits based upon whether the user turned off boost from sysfs. This however is not necessary when the highest perf value is the boost numerator. Suggested-by: Dhananjay Ugwekar Reviewed-by: Gautham R. Shenoy Fixes: 18d9b5227121 ("cpufreq/amd-pstate: Use nominal perf for limits when boost is disabled") Link: https://lore.kernel.org/r/20241209185248.16301-3-mario.limonciello@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index ab6fe9c2150c..66e5dfc711c0 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -570,16 +570,13 @@ static int amd_pstate_verify(struct cpufreq_policy_data *policy_data) static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy) { - u32 max_limit_perf, min_limit_perf, lowest_perf, max_perf; + u32 max_limit_perf, min_limit_perf, lowest_perf, max_perf, max_freq; struct amd_cpudata *cpudata = policy->driver_data; - if (cpudata->boost_supported && !policy->boost_enabled) - max_perf = READ_ONCE(cpudata->nominal_perf); - else - max_perf = READ_ONCE(cpudata->highest_perf); - - max_limit_perf = div_u64(policy->max * max_perf, policy->cpuinfo.max_freq); - min_limit_perf = div_u64(policy->min * max_perf, policy->cpuinfo.max_freq); + max_perf = READ_ONCE(cpudata->highest_perf); + max_freq = READ_ONCE(cpudata->max_freq); + max_limit_perf = div_u64(policy->max * max_perf, max_freq); + min_limit_perf = div_u64(policy->min * max_perf, max_freq); lowest_perf = READ_ONCE(cpudata->lowest_perf); if (min_limit_perf < lowest_perf) From 16c977f8177f9c2ecb88319c944722107c952731 Mon Sep 17 00:00:00 2001 From: Dhananjay Ugwekar Date: Wed, 4 Dec 2024 14:48:38 +0000 Subject: [PATCH 04/21] cpufreq/amd-pstate: Convert the amd_pstate_get/set_epp() to static calls MSR and shared memory based systems have different mechanisms to get and set the epp value. Split those mechanisms into different functions and assign them appropriately to the static calls at boot time. This eliminates the need for the "if(cpu_feature_enabled(X86_FEATURE_CPPC))" checks at runtime. Also, propagate the error code from rdmsrl_on_cpu() and cppc_get_epp_perf() to *_get_epp()'s caller, instead of returning -EIO unconditionally. Signed-off-by: Dhananjay Ugwekar Reviewed-by: Mario Limonciello Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20241204144842.164178-2-Dhananjay.Ugwekar@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 94 +++++++++++++++++++++++------------- 1 file changed, 61 insertions(+), 33 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 66e5dfc711c0..bc42d96984f4 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -180,26 +180,40 @@ static inline int get_mode_idx_from_str(const char *str, size_t size) static DEFINE_MUTEX(amd_pstate_limits_lock); static DEFINE_MUTEX(amd_pstate_driver_lock); -static s16 amd_pstate_get_epp(struct amd_cpudata *cpudata, u64 cppc_req_cached) +static s16 msr_get_epp(struct amd_cpudata *cpudata, u64 cppc_req_cached) { u64 epp; int ret; - if (cpu_feature_enabled(X86_FEATURE_CPPC)) { - if (!cppc_req_cached) { - epp = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, - &cppc_req_cached); - if (epp) - return epp; - } - epp = (cppc_req_cached >> 24) & 0xFF; - } else { - ret = cppc_get_epp_perf(cpudata->cpu, &epp); + if (!cppc_req_cached) { + ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &cppc_req_cached); if (ret < 0) { pr_debug("Could not retrieve energy perf value (%d)\n", ret); - return -EIO; + return ret; } } + epp = (cppc_req_cached >> 24) & 0xFF; + + return (s16)epp; +} + +DEFINE_STATIC_CALL(amd_pstate_get_epp, msr_get_epp); + +static inline s16 amd_pstate_get_epp(struct amd_cpudata *cpudata, u64 cppc_req_cached) +{ + return static_call(amd_pstate_get_epp)(cpudata, cppc_req_cached); +} + +static s16 shmem_get_epp(struct amd_cpudata *cpudata, u64 dummy) +{ + u64 epp; + int ret; + + ret = cppc_get_epp_perf(cpudata->cpu, &epp); + if (ret < 0) { + pr_debug("Could not retrieve energy perf value (%d)\n", ret); + return ret; + } return (s16)(epp & 0xff); } @@ -253,33 +267,45 @@ static inline void amd_pstate_update_perf(struct amd_cpudata *cpudata, max_perf, fast_switch); } -static int amd_pstate_set_epp(struct amd_cpudata *cpudata, u32 epp) +static int msr_set_epp(struct amd_cpudata *cpudata, u32 epp) +{ + int ret; + + u64 value = READ_ONCE(cpudata->cppc_req_cached); + + value &= ~GENMASK_ULL(31, 24); + value |= (u64)epp << 24; + WRITE_ONCE(cpudata->cppc_req_cached, value); + + ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); + if (!ret) + cpudata->epp_cached = epp; + + return ret; +} + +DEFINE_STATIC_CALL(amd_pstate_set_epp, msr_set_epp); + +static inline int amd_pstate_set_epp(struct amd_cpudata *cpudata, u32 epp) +{ + return static_call(amd_pstate_set_epp)(cpudata, epp); +} + +static int shmem_set_epp(struct amd_cpudata *cpudata, u32 epp) { int ret; struct cppc_perf_ctrls perf_ctrls; - if (cpu_feature_enabled(X86_FEATURE_CPPC)) { - u64 value = READ_ONCE(cpudata->cppc_req_cached); + amd_pstate_update_perf(cpudata, cpudata->min_limit_perf, 0U, + cpudata->max_limit_perf, false); - value &= ~GENMASK_ULL(31, 24); - value |= (u64)epp << 24; - WRITE_ONCE(cpudata->cppc_req_cached, value); - - ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); - if (!ret) - cpudata->epp_cached = epp; - } else { - amd_pstate_update_perf(cpudata, cpudata->min_limit_perf, 0U, - cpudata->max_limit_perf, false); - - perf_ctrls.energy_perf = epp; - ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1); - if (ret) { - pr_debug("failed to set energy perf value (%d)\n", ret); - return ret; - } - cpudata->epp_cached = epp; + perf_ctrls.energy_perf = epp; + ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1); + if (ret) { + pr_debug("failed to set energy perf value (%d)\n", ret); + return ret; } + cpudata->epp_cached = epp; return ret; } @@ -1869,6 +1895,8 @@ static int __init amd_pstate_init(void) static_call_update(amd_pstate_cppc_enable, shmem_cppc_enable); static_call_update(amd_pstate_init_perf, shmem_init_perf); static_call_update(amd_pstate_update_perf, shmem_update_perf); + static_call_update(amd_pstate_get_epp, shmem_get_epp); + static_call_update(amd_pstate_set_epp, shmem_set_epp); } if (amd_pstate_prefcore) { From 57a2b25e45cd40eaa2e505452384fa1b7248895a Mon Sep 17 00:00:00 2001 From: Dhananjay Ugwekar Date: Wed, 4 Dec 2024 14:48:39 +0000 Subject: [PATCH 05/21] cpufreq/amd-pstate: Move the invocation of amd_pstate_update_perf() amd_pstate_update_perf() should not be a part of shmem_set_epp() function, so move it to the amd_pstate_epp_update_limit() function, where it is needed. Signed-off-by: Dhananjay Ugwekar Reviewed-by: Mario Limonciello Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20241204144842.164178-3-Dhananjay.Ugwekar@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index bc42d96984f4..bd3e0f113a88 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -296,9 +296,6 @@ static int shmem_set_epp(struct amd_cpudata *cpudata, u32 epp) int ret; struct cppc_perf_ctrls perf_ctrls; - amd_pstate_update_perf(cpudata, cpudata->min_limit_perf, 0U, - cpudata->max_limit_perf, false); - perf_ctrls.energy_perf = epp; ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1); if (ret) { @@ -1600,6 +1597,10 @@ static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy) epp = 0; WRITE_ONCE(cpudata->cppc_req_cached, value); + + amd_pstate_update_perf(cpudata, cpudata->min_limit_perf, 0U, + cpudata->max_limit_perf, false); + return amd_pstate_set_epp(cpudata, epp); } From b1089e0c8817fda93d474eaa82ad86386887aefe Mon Sep 17 00:00:00 2001 From: Dhananjay Ugwekar Date: Wed, 4 Dec 2024 14:48:40 +0000 Subject: [PATCH 06/21] cpufreq/amd-pstate: Refactor amd_pstate_epp_reenable() and amd_pstate_epp_offline() Replace similar code chunks with amd_pstate_update_perf() and amd_pstate_set_epp() function calls. Signed-off-by: Dhananjay Ugwekar Reviewed-by: Mario Limonciello Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20241204144842.164178-4-Dhananjay.Ugwekar@amd.com [ML: Fix LKP reported error about unused variable] Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 36 ++++++------------------------------ 1 file changed, 6 insertions(+), 30 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index bd3e0f113a88..23c5840dc406 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -1632,25 +1632,17 @@ static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) static void amd_pstate_epp_reenable(struct amd_cpudata *cpudata) { - struct cppc_perf_ctrls perf_ctrls; - u64 value, max_perf; + u64 max_perf; int ret; ret = amd_pstate_cppc_enable(true); if (ret) pr_err("failed to enable amd pstate during resume, return %d\n", ret); - value = READ_ONCE(cpudata->cppc_req_cached); max_perf = READ_ONCE(cpudata->highest_perf); - if (cpu_feature_enabled(X86_FEATURE_CPPC)) { - wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); - } else { - perf_ctrls.max_perf = max_perf; - cppc_set_perf(cpudata->cpu, &perf_ctrls); - perf_ctrls.energy_perf = AMD_CPPC_ENERGY_PERF_PREF(cpudata->epp_cached); - cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1); - } + amd_pstate_update_perf(cpudata, 0, 0, max_perf, false); + amd_pstate_set_epp(cpudata, cpudata->epp_cached); } static int amd_pstate_epp_cpu_online(struct cpufreq_policy *policy) @@ -1670,31 +1662,15 @@ static int amd_pstate_epp_cpu_online(struct cpufreq_policy *policy) static void amd_pstate_epp_offline(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata = policy->driver_data; - struct cppc_perf_ctrls perf_ctrls; int min_perf; - u64 value; min_perf = READ_ONCE(cpudata->lowest_perf); - value = READ_ONCE(cpudata->cppc_req_cached); mutex_lock(&amd_pstate_limits_lock); - if (cpu_feature_enabled(X86_FEATURE_CPPC)) { - cpudata->epp_policy = CPUFREQ_POLICY_UNKNOWN; - /* Set max perf same as min perf */ - value &= ~AMD_CPPC_MAX_PERF(~0L); - value |= AMD_CPPC_MAX_PERF(min_perf); - value &= ~AMD_CPPC_MIN_PERF(~0L); - value |= AMD_CPPC_MIN_PERF(min_perf); - wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); - } else { - perf_ctrls.desired_perf = 0; - perf_ctrls.min_perf = min_perf; - perf_ctrls.max_perf = min_perf; - cppc_set_perf(cpudata->cpu, &perf_ctrls); - perf_ctrls.energy_perf = AMD_CPPC_ENERGY_PERF_PREF(HWP_EPP_BALANCE_POWERSAVE); - cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1); - } + amd_pstate_update_perf(cpudata, min_perf, 0, min_perf, false); + amd_pstate_set_epp(cpudata, AMD_CPPC_EPP_BALANCE_POWERSAVE); + mutex_unlock(&amd_pstate_limits_lock); } From b78f8c87ec3e7499bb049986838636d3afbc7ece Mon Sep 17 00:00:00 2001 From: Dhananjay Ugwekar Date: Wed, 4 Dec 2024 14:48:41 +0000 Subject: [PATCH 07/21] cpufreq/amd-pstate: Remove the cppc_state check in offline/online functions Only amd_pstate_epp driver (i.e. cppc_state = ACTIVE) enters the amd_pstate_epp_offline() and amd_pstate_epp_cpu_online() functions, so remove the unnecessary if condition checking if cppc_state is equal to AMD_PSTATE_ACTIVE. Signed-off-by: Dhananjay Ugwekar Reviewed-by: Mario Limonciello Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20241204144842.164178-5-Dhananjay.Ugwekar@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 23c5840dc406..9b8d7f299fca 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -1651,10 +1651,8 @@ static int amd_pstate_epp_cpu_online(struct cpufreq_policy *policy) pr_debug("AMD CPU Core %d going online\n", cpudata->cpu); - if (cppc_state == AMD_PSTATE_ACTIVE) { - amd_pstate_epp_reenable(cpudata); - cpudata->suspended = false; - } + amd_pstate_epp_reenable(cpudata); + cpudata->suspended = false; return 0; } @@ -1683,8 +1681,7 @@ static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy) if (cpudata->suspended) return 0; - if (cppc_state == AMD_PSTATE_ACTIVE) - amd_pstate_epp_offline(policy); + amd_pstate_epp_offline(policy); return 0; } From 53ec2101dfede8fecdd240662281a12e537c3411 Mon Sep 17 00:00:00 2001 From: Dhananjay Ugwekar Date: Wed, 4 Dec 2024 14:48:42 +0000 Subject: [PATCH 08/21] cpufreq/amd-pstate: Merge amd_pstate_epp_cpu_offline() and amd_pstate_epp_offline() amd_pstate_epp_offline() is only called from within amd_pstate_epp_cpu_offline() and doesn't make much sense to have it at all. Hence, remove it. Also remove the unncessary debug print in the offline path while at it. Signed-off-by: Dhananjay Ugwekar Reviewed-by: Gautham R. Shenoy Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20241204144842.164178-6-Dhananjay.Ugwekar@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 9b8d7f299fca..8ce754ead328 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -1657,11 +1657,14 @@ static int amd_pstate_epp_cpu_online(struct cpufreq_policy *policy) return 0; } -static void amd_pstate_epp_offline(struct cpufreq_policy *policy) +static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata = policy->driver_data; int min_perf; + if (cpudata->suspended) + return 0; + min_perf = READ_ONCE(cpudata->lowest_perf); mutex_lock(&amd_pstate_limits_lock); @@ -1670,18 +1673,6 @@ static void amd_pstate_epp_offline(struct cpufreq_policy *policy) amd_pstate_set_epp(cpudata, AMD_CPPC_EPP_BALANCE_POWERSAVE); mutex_unlock(&amd_pstate_limits_lock); -} - -static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy) -{ - struct amd_cpudata *cpudata = policy->driver_data; - - pr_debug("AMD CPU Core %d going offline\n", cpudata->cpu); - - if (cpudata->suspended) - return 0; - - amd_pstate_epp_offline(policy); return 0; } From 4dcd130151a654108a414b298df9e21a0d3575c9 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Dec 2024 12:52:36 -0600 Subject: [PATCH 09/21] cpufreq/amd-pstate: Add trace event for EPP perf updates In "active" mode the most important thing for debugging whether an issue is hardware or software based is to look at what was the last thing written to the CPPC request MSR or shared memory region. The 'amd_pstate_epp_perf' trace event shows the values being written for all CPUs. Reviewed-by: Perry Yuan Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20241209185248.16301-4-mario.limonciello@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate-trace.h | 45 ++++++++++++++++++++++++++++++ drivers/cpufreq/amd-pstate.c | 28 +++++++++++++++++++ 2 files changed, 73 insertions(+) diff --git a/drivers/cpufreq/amd-pstate-trace.h b/drivers/cpufreq/amd-pstate-trace.h index 35f38ae67fb1..e2221a4b6901 100644 --- a/drivers/cpufreq/amd-pstate-trace.h +++ b/drivers/cpufreq/amd-pstate-trace.h @@ -88,6 +88,51 @@ TRACE_EVENT(amd_pstate_perf, ) ); +TRACE_EVENT(amd_pstate_epp_perf, + + TP_PROTO(unsigned int cpu_id, + unsigned int highest_perf, + unsigned int epp, + unsigned int min_perf, + unsigned int max_perf, + bool boost + ), + + TP_ARGS(cpu_id, + highest_perf, + epp, + min_perf, + max_perf, + boost), + + TP_STRUCT__entry( + __field(unsigned int, cpu_id) + __field(unsigned int, highest_perf) + __field(unsigned int, epp) + __field(unsigned int, min_perf) + __field(unsigned int, max_perf) + __field(bool, boost) + ), + + TP_fast_assign( + __entry->cpu_id = cpu_id; + __entry->highest_perf = highest_perf; + __entry->epp = epp; + __entry->min_perf = min_perf; + __entry->max_perf = max_perf; + __entry->boost = boost; + ), + + TP_printk("cpu%u: [%u<->%u]/%u, epp=%u, boost=%u", + (unsigned int)__entry->cpu_id, + (unsigned int)__entry->min_perf, + (unsigned int)__entry->max_perf, + (unsigned int)__entry->highest_perf, + (unsigned int)__entry->epp, + (bool)__entry->boost + ) +); + #endif /* _AMD_PSTATE_TRACE_H */ /* This part must be outside protection */ diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 8ce754ead328..20d52bce1882 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -324,6 +324,14 @@ static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata, return -EBUSY; } + if (trace_amd_pstate_epp_perf_enabled()) { + trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, + epp, + AMD_CPPC_MIN_PERF(cpudata->cppc_req_cached), + AMD_CPPC_MAX_PERF(cpudata->cppc_req_cached), + cpudata->boost_state); + } + ret = amd_pstate_set_epp(cpudata, epp); return ret; @@ -1598,6 +1606,13 @@ static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy) WRITE_ONCE(cpudata->cppc_req_cached, value); + if (trace_amd_pstate_epp_perf_enabled()) { + trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, epp, + cpudata->min_limit_perf, + cpudata->max_limit_perf, + policy->boost_enabled); + } + amd_pstate_update_perf(cpudata, cpudata->min_limit_perf, 0U, cpudata->max_limit_perf, false); @@ -1641,6 +1656,13 @@ static void amd_pstate_epp_reenable(struct amd_cpudata *cpudata) max_perf = READ_ONCE(cpudata->highest_perf); + if (trace_amd_pstate_epp_perf_enabled()) { + trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, + cpudata->epp_cached, + AMD_CPPC_MIN_PERF(cpudata->cppc_req_cached), + max_perf, cpudata->boost_state); + } + amd_pstate_update_perf(cpudata, 0, 0, max_perf, false); amd_pstate_set_epp(cpudata, cpudata->epp_cached); } @@ -1669,6 +1691,12 @@ static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy) mutex_lock(&amd_pstate_limits_lock); + if (trace_amd_pstate_epp_perf_enabled()) { + trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, + AMD_CPPC_EPP_BALANCE_POWERSAVE, + min_perf, min_perf, policy->boost_enabled); + } + amd_pstate_update_perf(cpudata, min_perf, 0, min_perf, false); amd_pstate_set_epp(cpudata, AMD_CPPC_EPP_BALANCE_POWERSAVE); From 6c093d5a5b73ec1caf1e706510ae6031af2f9d43 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Dec 2024 12:52:37 -0600 Subject: [PATCH 10/21] cpufreq/amd-pstate: convert mutex use to guard() Using scoped guard declaration will unlock mutexes automatically. Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20241209185248.16301-5-mario.limonciello@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 34 +++++++++++++--------------------- 1 file changed, 13 insertions(+), 21 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 20d52bce1882..bcb9367aa9ca 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -758,12 +758,12 @@ static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state) pr_err("Boost mode is not supported by this processor or SBIOS\n"); return -EOPNOTSUPP; } - mutex_lock(&amd_pstate_driver_lock); + guard(mutex)(&amd_pstate_driver_lock); + ret = amd_pstate_cpu_boost_update(policy, state); WRITE_ONCE(cpudata->boost_state, !ret ? state : false); policy->boost_enabled = !ret ? state : false; refresh_frequency_limits(policy); - mutex_unlock(&amd_pstate_driver_lock); return ret; } @@ -854,7 +854,8 @@ static void amd_pstate_update_limits(unsigned int cpu) if (!amd_pstate_prefcore) return; - mutex_lock(&amd_pstate_driver_lock); + guard(mutex)(&amd_pstate_driver_lock); + ret = amd_get_highest_perf(cpu, &cur_high); if (ret) goto free_cpufreq_put; @@ -874,7 +875,6 @@ static void amd_pstate_update_limits(unsigned int cpu) if (!highest_perf_changed) cpufreq_update_policy(cpu); - mutex_unlock(&amd_pstate_driver_lock); } /* @@ -1203,11 +1203,11 @@ static ssize_t store_energy_performance_preference( if (ret < 0) return -EINVAL; - mutex_lock(&amd_pstate_limits_lock); - ret = amd_pstate_set_energy_pref_index(cpudata, ret); - mutex_unlock(&amd_pstate_limits_lock); + guard(mutex)(&amd_pstate_limits_lock); - return ret ?: count; + ret = amd_pstate_set_energy_pref_index(cpudata, ret); + + return ret ? ret : count; } static ssize_t show_energy_performance_preference( @@ -1371,13 +1371,10 @@ EXPORT_SYMBOL_GPL(amd_pstate_update_status); static ssize_t status_show(struct device *dev, struct device_attribute *attr, char *buf) { - ssize_t ret; - mutex_lock(&amd_pstate_driver_lock); - ret = amd_pstate_show_status(buf); - mutex_unlock(&amd_pstate_driver_lock); + guard(mutex)(&amd_pstate_driver_lock); - return ret; + return amd_pstate_show_status(buf); } static ssize_t status_store(struct device *a, struct device_attribute *b, @@ -1386,9 +1383,8 @@ static ssize_t status_store(struct device *a, struct device_attribute *b, char *p = memchr(buf, '\n', count); int ret; - mutex_lock(&amd_pstate_driver_lock); + guard(mutex)(&amd_pstate_driver_lock); ret = amd_pstate_update_status(buf, p ? p - buf : count); - mutex_unlock(&amd_pstate_driver_lock); return ret < 0 ? ret : count; } @@ -1689,7 +1685,7 @@ static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy) min_perf = READ_ONCE(cpudata->lowest_perf); - mutex_lock(&amd_pstate_limits_lock); + guard(mutex)(&amd_pstate_limits_lock); if (trace_amd_pstate_epp_perf_enabled()) { trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, @@ -1700,8 +1696,6 @@ static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy) amd_pstate_update_perf(cpudata, min_perf, 0, min_perf, false); amd_pstate_set_epp(cpudata, AMD_CPPC_EPP_BALANCE_POWERSAVE); - mutex_unlock(&amd_pstate_limits_lock); - return 0; } @@ -1730,13 +1724,11 @@ static int amd_pstate_epp_resume(struct cpufreq_policy *policy) struct amd_cpudata *cpudata = policy->driver_data; if (cpudata->suspended) { - mutex_lock(&amd_pstate_limits_lock); + guard(mutex)(&amd_pstate_limits_lock); /* enable amd pstate from suspend state*/ amd_pstate_epp_reenable(cpudata); - mutex_unlock(&amd_pstate_limits_lock); - cpudata->suspended = false; } From 3b43739824a6b617d8213dd2bce6fb1b2747c377 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Dec 2024 12:52:38 -0600 Subject: [PATCH 11/21] cpufreq/amd-pstate: Drop cached epp_policy variable epp_policy is not used by any of the current code and there is no need to cache it. Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20241209185248.16301-6-mario.limonciello@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 3 --- drivers/cpufreq/amd-pstate.h | 2 -- 2 files changed, 5 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index bcb9367aa9ca..0e77584dfade 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -1478,7 +1478,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) return -ENOMEM; cpudata->cpu = policy->cpu; - cpudata->epp_policy = 0; ret = amd_pstate_init_perf(cpudata); if (ret) @@ -1585,8 +1584,6 @@ static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy) value &= ~AMD_CPPC_DES_PERF(~0L); value |= AMD_CPPC_DES_PERF(0); - cpudata->epp_policy = cpudata->policy; - /* Get BIOS pre-defined epp value */ epp = amd_pstate_get_epp(cpudata, value); if (epp < 0) { diff --git a/drivers/cpufreq/amd-pstate.h b/drivers/cpufreq/amd-pstate.h index cd573bc6b6db..7765c82f975c 100644 --- a/drivers/cpufreq/amd-pstate.h +++ b/drivers/cpufreq/amd-pstate.h @@ -57,7 +57,6 @@ struct amd_aperf_mperf { * @hw_prefcore: check whether HW supports preferred core featue. * Only when hw_prefcore and early prefcore param are true, * AMD P-State driver supports preferred core featue. - * @epp_policy: Last saved policy used to set energy-performance preference * @epp_cached: Cached CPPC energy-performance preference value * @policy: Cpufreq policy value * @cppc_cap1_cached Cached MSR_AMD_CPPC_CAP1 register value @@ -94,7 +93,6 @@ struct amd_cpudata { bool hw_prefcore; /* EPP feature related attributes*/ - s16 epp_policy; s16 epp_cached; u32 policy; u64 cppc_cap1_cached; From 88a95ba066a962d4d39c6a36b18bf665f51d3767 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Dec 2024 12:52:39 -0600 Subject: [PATCH 12/21] cpufreq/amd-pstate: Use FIELD_PREP and FIELD_GET macros The FIELD_PREP and FIELD_GET macros improve readability and help to avoid shifting bugs. Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20241209185248.16301-7-mario.limonciello@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 51 ++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 28 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 0e77584dfade..fbd1b36846c5 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -22,6 +22,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include #include #include #include @@ -88,6 +89,11 @@ static bool cppc_enabled; static bool amd_pstate_prefcore = true; static struct quirk_entry *quirks; +#define AMD_CPPC_MAX_PERF_MASK GENMASK(7, 0) +#define AMD_CPPC_MIN_PERF_MASK GENMASK(15, 8) +#define AMD_CPPC_DES_PERF_MASK GENMASK(23, 16) +#define AMD_CPPC_EPP_PERF_MASK GENMASK(31, 24) + /* * AMD Energy Preference Performance (EPP) * The EPP is used in the CCLK DPM controller to drive @@ -182,7 +188,6 @@ static DEFINE_MUTEX(amd_pstate_driver_lock); static s16 msr_get_epp(struct amd_cpudata *cpudata, u64 cppc_req_cached) { - u64 epp; int ret; if (!cppc_req_cached) { @@ -192,9 +197,8 @@ static s16 msr_get_epp(struct amd_cpudata *cpudata, u64 cppc_req_cached) return ret; } } - epp = (cppc_req_cached >> 24) & 0xFF; - return (s16)epp; + return FIELD_GET(AMD_CPPC_EPP_PERF_MASK, cppc_req_cached); } DEFINE_STATIC_CALL(amd_pstate_get_epp, msr_get_epp); @@ -269,12 +273,11 @@ static inline void amd_pstate_update_perf(struct amd_cpudata *cpudata, static int msr_set_epp(struct amd_cpudata *cpudata, u32 epp) { + u64 value = READ_ONCE(cpudata->cppc_req_cached); int ret; - u64 value = READ_ONCE(cpudata->cppc_req_cached); - - value &= ~GENMASK_ULL(31, 24); - value |= (u64)epp << 24; + value &= ~AMD_CPPC_EPP_PERF_MASK; + value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp); WRITE_ONCE(cpudata->cppc_req_cached, value); ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); @@ -327,8 +330,8 @@ static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata, if (trace_amd_pstate_epp_perf_enabled()) { trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, epp, - AMD_CPPC_MIN_PERF(cpudata->cppc_req_cached), - AMD_CPPC_MAX_PERF(cpudata->cppc_req_cached), + FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cpudata->cppc_req_cached), + FIELD_GET(AMD_CPPC_MAX_PERF_MASK, cpudata->cppc_req_cached), cpudata->boost_state); } @@ -542,18 +545,15 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf, des_perf = 0; } - value &= ~AMD_CPPC_MIN_PERF(~0L); - value |= AMD_CPPC_MIN_PERF(min_perf); - - value &= ~AMD_CPPC_DES_PERF(~0L); - value |= AMD_CPPC_DES_PERF(des_perf); - /* limit the max perf when core performance boost feature is disabled */ if (!cpudata->boost_supported) max_perf = min_t(unsigned long, nominal_perf, max_perf); - value &= ~AMD_CPPC_MAX_PERF(~0L); - value |= AMD_CPPC_MAX_PERF(max_perf); + value &= ~(AMD_CPPC_MAX_PERF_MASK | AMD_CPPC_MIN_PERF_MASK | + AMD_CPPC_DES_PERF_MASK); + value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, max_perf); + value |= FIELD_PREP(AMD_CPPC_DES_PERF_MASK, des_perf); + value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, min_perf); if (trace_amd_pstate_perf_enabled() && amd_pstate_sample(cpudata)) { trace_amd_pstate_perf(min_perf, des_perf, max_perf, cpudata->freq, @@ -1573,16 +1573,11 @@ static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy) if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) min_perf = min(cpudata->nominal_perf, max_perf); - /* Initial min/max values for CPPC Performance Controls Register */ - value &= ~AMD_CPPC_MIN_PERF(~0L); - value |= AMD_CPPC_MIN_PERF(min_perf); - - value &= ~AMD_CPPC_MAX_PERF(~0L); - value |= AMD_CPPC_MAX_PERF(max_perf); - - /* CPPC EPP feature require to set zero to the desire perf bit */ - value &= ~AMD_CPPC_DES_PERF(~0L); - value |= AMD_CPPC_DES_PERF(0); + value &= ~(AMD_CPPC_MAX_PERF_MASK | AMD_CPPC_MIN_PERF_MASK | + AMD_CPPC_DES_PERF_MASK); + value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, max_perf); + value |= FIELD_PREP(AMD_CPPC_DES_PERF_MASK, 0); + value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, min_perf); /* Get BIOS pre-defined epp value */ epp = amd_pstate_get_epp(cpudata, value); @@ -1652,7 +1647,7 @@ static void amd_pstate_epp_reenable(struct amd_cpudata *cpudata) if (trace_amd_pstate_epp_perf_enabled()) { trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, cpudata->epp_cached, - AMD_CPPC_MIN_PERF(cpudata->cppc_req_cached), + FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cpudata->cppc_req_cached), max_perf, cpudata->boost_state); } From 474e7218e81e7932ed18f91969b72169005ff038 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Dec 2024 12:52:40 -0600 Subject: [PATCH 13/21] cpufreq/amd-pstate: Only update the cached value in msr_set_epp() on success If writing the MSR MSR_AMD_CPPC_REQ fails then the cached value in the amd_cpudata structure should not be updated. Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20241209185248.16301-8-mario.limonciello@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index fbd1b36846c5..ebfc9e20b6cb 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -278,11 +278,15 @@ static int msr_set_epp(struct amd_cpudata *cpudata, u32 epp) value &= ~AMD_CPPC_EPP_PERF_MASK; value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp); - WRITE_ONCE(cpudata->cppc_req_cached, value); ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); - if (!ret) - cpudata->epp_cached = epp; + if (ret) { + pr_err("failed to set energy perf value (%d)\n", ret); + return ret; + } + + cpudata->epp_cached = epp; + WRITE_ONCE(cpudata->cppc_req_cached, value); return ret; } From 68cb0e77b6439fea64c6907c563b7bd27f2ee57f Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Dec 2024 12:52:41 -0600 Subject: [PATCH 14/21] cpufreq/amd-pstate: store all values in cpudata struct in khz Storing values in the cpudata structure in different units leads to confusion and hardcoded conversions elsewhere. After ratios are calculated store everything in khz for any future use. Adjust all relevant consumers for this change as well. Suggested-by: Dhananjay Ugwekar Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20241209185248.16301-9-mario.limonciello@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate-ut.c | 12 +++++------- drivers/cpufreq/amd-pstate.c | 28 ++++++++++++++-------------- 2 files changed, 19 insertions(+), 21 deletions(-) diff --git a/drivers/cpufreq/amd-pstate-ut.c b/drivers/cpufreq/amd-pstate-ut.c index a261d7300951..3a0a380c3590 100644 --- a/drivers/cpufreq/amd-pstate-ut.c +++ b/drivers/cpufreq/amd-pstate-ut.c @@ -207,7 +207,6 @@ static void amd_pstate_ut_check_freq(u32 index) int cpu = 0; struct cpufreq_policy *policy = NULL; struct amd_cpudata *cpudata = NULL; - u32 nominal_freq_khz; for_each_possible_cpu(cpu) { policy = cpufreq_cpu_get(cpu); @@ -215,14 +214,13 @@ static void amd_pstate_ut_check_freq(u32 index) break; cpudata = policy->driver_data; - nominal_freq_khz = cpudata->nominal_freq*1000; - if (!((cpudata->max_freq >= nominal_freq_khz) && - (nominal_freq_khz > cpudata->lowest_nonlinear_freq) && + if (!((cpudata->max_freq >= cpudata->nominal_freq) && + (cpudata->nominal_freq > cpudata->lowest_nonlinear_freq) && (cpudata->lowest_nonlinear_freq > cpudata->min_freq) && (cpudata->min_freq > 0))) { amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; pr_err("%s cpu%d max=%d >= nominal=%d > lowest_nonlinear=%d > min=%d > 0, the formula is incorrect!\n", - __func__, cpu, cpudata->max_freq, nominal_freq_khz, + __func__, cpu, cpudata->max_freq, cpudata->nominal_freq, cpudata->lowest_nonlinear_freq, cpudata->min_freq); goto skip_test; } @@ -236,13 +234,13 @@ static void amd_pstate_ut_check_freq(u32 index) if (cpudata->boost_supported) { if ((policy->max == cpudata->max_freq) || - (policy->max == nominal_freq_khz)) + (policy->max == cpudata->nominal_freq)) amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS; else { amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; pr_err("%s cpu%d policy_max=%d should be equal cpu_max=%d or cpu_nominal=%d !\n", __func__, cpu, policy->max, cpudata->max_freq, - nominal_freq_khz); + cpudata->nominal_freq); goto skip_test; } } else { diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index ebfc9e20b6cb..bf2512e1f117 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -739,8 +739,8 @@ static int amd_pstate_cpu_boost_update(struct cpufreq_policy *policy, bool on) if (on) policy->cpuinfo.max_freq = max_freq; - else if (policy->cpuinfo.max_freq > nominal_freq * 1000) - policy->cpuinfo.max_freq = nominal_freq * 1000; + else if (policy->cpuinfo.max_freq > nominal_freq) + policy->cpuinfo.max_freq = nominal_freq; policy->max = policy->cpuinfo.max_freq; @@ -940,29 +940,29 @@ static int amd_pstate_init_freq(struct amd_cpudata *cpudata) return ret; if (quirks && quirks->lowest_freq) - min_freq = quirks->lowest_freq * 1000; + min_freq = quirks->lowest_freq; else - min_freq = cppc_perf.lowest_freq * 1000; + min_freq = cppc_perf.lowest_freq; if (quirks && quirks->nominal_freq) - nominal_freq = quirks->nominal_freq ; + nominal_freq = quirks->nominal_freq; else nominal_freq = cppc_perf.nominal_freq; nominal_perf = READ_ONCE(cpudata->nominal_perf); boost_ratio = div_u64(cpudata->highest_perf << SCHED_CAPACITY_SHIFT, nominal_perf); - max_freq = (nominal_freq * boost_ratio >> SCHED_CAPACITY_SHIFT) * 1000; + max_freq = (nominal_freq * boost_ratio >> SCHED_CAPACITY_SHIFT); lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf); lowest_nonlinear_ratio = div_u64(lowest_nonlinear_perf << SCHED_CAPACITY_SHIFT, nominal_perf); - lowest_nonlinear_freq = (nominal_freq * lowest_nonlinear_ratio >> SCHED_CAPACITY_SHIFT) * 1000; + lowest_nonlinear_freq = (nominal_freq * lowest_nonlinear_ratio >> SCHED_CAPACITY_SHIFT); - WRITE_ONCE(cpudata->min_freq, min_freq); - WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq); - WRITE_ONCE(cpudata->nominal_freq, nominal_freq); - WRITE_ONCE(cpudata->max_freq, max_freq); + WRITE_ONCE(cpudata->min_freq, min_freq * 1000); + WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq * 1000); + WRITE_ONCE(cpudata->nominal_freq, nominal_freq * 1000); + WRITE_ONCE(cpudata->max_freq, max_freq * 1000); /** * Below values need to be initialized correctly, otherwise driver will fail to load @@ -972,13 +972,13 @@ static int amd_pstate_init_freq(struct amd_cpudata *cpudata) */ if (min_freq <= 0 || max_freq <= 0 || nominal_freq <= 0 || min_freq > max_freq) { pr_err("min_freq(%d) or max_freq(%d) or nominal_freq(%d) value is incorrect\n", - min_freq, max_freq, nominal_freq * 1000); + min_freq, max_freq, nominal_freq); return -EINVAL; } - if (lowest_nonlinear_freq <= min_freq || lowest_nonlinear_freq > nominal_freq * 1000) { + if (lowest_nonlinear_freq <= min_freq || lowest_nonlinear_freq > nominal_freq) { pr_err("lowest_nonlinear_freq(%d) value is out of range [min_freq(%d), nominal_freq(%d)]\n", - lowest_nonlinear_freq, min_freq, nominal_freq * 1000); + lowest_nonlinear_freq, min_freq, nominal_freq); return -EINVAL; } From 942718f2a236cb3b27d2dbb5942538681b6e0e88 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Dec 2024 12:52:42 -0600 Subject: [PATCH 15/21] cpufreq/amd-pstate: Change amd_pstate_update_perf() to return an int As msr_update_perf() calls an MSR it's possible that it fails. Pass this return code up to the caller. Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20241209185248.16301-10-mario.limonciello@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index bf2512e1f117..d279ace500d7 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -251,24 +251,26 @@ static int amd_pstate_get_energy_pref_index(struct amd_cpudata *cpudata) return index; } -static void msr_update_perf(struct amd_cpudata *cpudata, u32 min_perf, +static int msr_update_perf(struct amd_cpudata *cpudata, u32 min_perf, u32 des_perf, u32 max_perf, bool fast_switch) { - if (fast_switch) + if (fast_switch) { wrmsrl(MSR_AMD_CPPC_REQ, READ_ONCE(cpudata->cppc_req_cached)); - else - wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, - READ_ONCE(cpudata->cppc_req_cached)); + return 0; + } + + return wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, + READ_ONCE(cpudata->cppc_req_cached)); } DEFINE_STATIC_CALL(amd_pstate_update_perf, msr_update_perf); -static inline void amd_pstate_update_perf(struct amd_cpudata *cpudata, +static inline int amd_pstate_update_perf(struct amd_cpudata *cpudata, u32 min_perf, u32 des_perf, u32 max_perf, bool fast_switch) { - static_call(amd_pstate_update_perf)(cpudata, min_perf, des_perf, - max_perf, fast_switch); + return static_call(amd_pstate_update_perf)(cpudata, min_perf, des_perf, + max_perf, fast_switch); } static int msr_set_epp(struct amd_cpudata *cpudata, u32 epp) @@ -480,7 +482,7 @@ static inline int amd_pstate_init_perf(struct amd_cpudata *cpudata) return static_call(amd_pstate_init_perf)(cpudata); } -static void shmem_update_perf(struct amd_cpudata *cpudata, +static int shmem_update_perf(struct amd_cpudata *cpudata, u32 min_perf, u32 des_perf, u32 max_perf, bool fast_switch) { @@ -490,7 +492,7 @@ static void shmem_update_perf(struct amd_cpudata *cpudata, perf_ctrls.min_perf = min_perf; perf_ctrls.desired_perf = des_perf; - cppc_set_perf(cpudata->cpu, &perf_ctrls); + return cppc_set_perf(cpudata->cpu, &perf_ctrls); } static inline bool amd_pstate_sample(struct amd_cpudata *cpudata) From 3f7b835fa4d0d06f82249a3aca989fdf9bdf4656 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Dec 2024 12:52:43 -0600 Subject: [PATCH 16/21] cpufreq/amd-pstate: Move limit updating code The limit updating code in amd_pstate_epp_update_limit() should not only apply to EPP updates. Move it to amd_pstate_update_min_max_limit() so other callers can benefit as well. With this move it's not necessary to have clamp_t calls anymore because the verify callback is called when setting limits. Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20241209185248.16301-11-mario.limonciello@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 28 +++++----------------------- 1 file changed, 5 insertions(+), 23 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index d279ace500d7..55529e32d325 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -537,10 +537,6 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf, u32 nominal_perf = READ_ONCE(cpudata->nominal_perf); u64 value = prev; - min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf, - cpudata->max_limit_perf); - max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf, - cpudata->max_limit_perf); des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf); max_freq = READ_ONCE(cpudata->max_limit_freq); @@ -607,7 +603,7 @@ static int amd_pstate_verify(struct cpufreq_policy_data *policy_data) static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy) { - u32 max_limit_perf, min_limit_perf, lowest_perf, max_perf, max_freq; + u32 max_limit_perf, min_limit_perf, max_perf, max_freq; struct amd_cpudata *cpudata = policy->driver_data; max_perf = READ_ONCE(cpudata->highest_perf); @@ -615,12 +611,8 @@ static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy) max_limit_perf = div_u64(policy->max * max_perf, max_freq); min_limit_perf = div_u64(policy->min * max_perf, max_freq); - lowest_perf = READ_ONCE(cpudata->lowest_perf); - if (min_limit_perf < lowest_perf) - min_limit_perf = lowest_perf; - - if (max_limit_perf < min_limit_perf) - max_limit_perf = min_limit_perf; + if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) + min_limit_perf = min(cpudata->nominal_perf, max_limit_perf); WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf); WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf); @@ -1562,28 +1554,18 @@ static void amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy) static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata = policy->driver_data; - u32 max_perf, min_perf; u64 value; s16 epp; - max_perf = READ_ONCE(cpudata->highest_perf); - min_perf = READ_ONCE(cpudata->lowest_perf); amd_pstate_update_min_max_limit(policy); - max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf, - cpudata->max_limit_perf); - min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf, - cpudata->max_limit_perf); value = READ_ONCE(cpudata->cppc_req_cached); - if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) - min_perf = min(cpudata->nominal_perf, max_perf); - value &= ~(AMD_CPPC_MAX_PERF_MASK | AMD_CPPC_MIN_PERF_MASK | AMD_CPPC_DES_PERF_MASK); - value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, max_perf); + value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, cpudata->max_limit_perf); value |= FIELD_PREP(AMD_CPPC_DES_PERF_MASK, 0); - value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, min_perf); + value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, cpudata->min_limit_perf); /* Get BIOS pre-defined epp value */ epp = amd_pstate_get_epp(cpudata, value); From b3781f30bfcfd7db12de2595adb01779e565e1c6 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Dec 2024 12:52:44 -0600 Subject: [PATCH 17/21] cpufreq/amd-pstate: Cache EPP value and use that everywhere Cache the value in cpudata->epp_cached, and use that for all callers. As all callers use cached value merge amd_pstate_get_energy_pref_index() into show_energy_performance_preference(). Check if the EPP value is changed before writing it to MSR or shared memory region. Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20241209185248.16301-12-mario.limonciello@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 105 ++++++++++++++--------------------- 1 file changed, 43 insertions(+), 62 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 55529e32d325..d1f82e4ca9b1 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -186,29 +186,28 @@ static inline int get_mode_idx_from_str(const char *str, size_t size) static DEFINE_MUTEX(amd_pstate_limits_lock); static DEFINE_MUTEX(amd_pstate_driver_lock); -static s16 msr_get_epp(struct amd_cpudata *cpudata, u64 cppc_req_cached) +static s16 msr_get_epp(struct amd_cpudata *cpudata) { + u64 value; int ret; - if (!cppc_req_cached) { - ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &cppc_req_cached); - if (ret < 0) { - pr_debug("Could not retrieve energy perf value (%d)\n", ret); - return ret; - } + ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value); + if (ret < 0) { + pr_debug("Could not retrieve energy perf value (%d)\n", ret); + return ret; } - return FIELD_GET(AMD_CPPC_EPP_PERF_MASK, cppc_req_cached); + return FIELD_GET(AMD_CPPC_EPP_PERF_MASK, value); } DEFINE_STATIC_CALL(amd_pstate_get_epp, msr_get_epp); -static inline s16 amd_pstate_get_epp(struct amd_cpudata *cpudata, u64 cppc_req_cached) +static inline s16 amd_pstate_get_epp(struct amd_cpudata *cpudata) { - return static_call(amd_pstate_get_epp)(cpudata, cppc_req_cached); + return static_call(amd_pstate_get_epp)(cpudata); } -static s16 shmem_get_epp(struct amd_cpudata *cpudata, u64 dummy) +static s16 shmem_get_epp(struct amd_cpudata *cpudata) { u64 epp; int ret; @@ -222,35 +221,6 @@ static s16 shmem_get_epp(struct amd_cpudata *cpudata, u64 dummy) return (s16)(epp & 0xff); } -static int amd_pstate_get_energy_pref_index(struct amd_cpudata *cpudata) -{ - s16 epp; - int index = -EINVAL; - - epp = amd_pstate_get_epp(cpudata, 0); - if (epp < 0) - return epp; - - switch (epp) { - case AMD_CPPC_EPP_PERFORMANCE: - index = EPP_INDEX_PERFORMANCE; - break; - case AMD_CPPC_EPP_BALANCE_PERFORMANCE: - index = EPP_INDEX_BALANCE_PERFORMANCE; - break; - case AMD_CPPC_EPP_BALANCE_POWERSAVE: - index = EPP_INDEX_BALANCE_POWERSAVE; - break; - case AMD_CPPC_EPP_POWERSAVE: - index = EPP_INDEX_POWERSAVE; - break; - default: - break; - } - - return index; -} - static int msr_update_perf(struct amd_cpudata *cpudata, u32 min_perf, u32 des_perf, u32 max_perf, bool fast_switch) { @@ -275,19 +245,23 @@ static inline int amd_pstate_update_perf(struct amd_cpudata *cpudata, static int msr_set_epp(struct amd_cpudata *cpudata, u32 epp) { - u64 value = READ_ONCE(cpudata->cppc_req_cached); + u64 value, prev; int ret; + value = prev = READ_ONCE(cpudata->cppc_req_cached); value &= ~AMD_CPPC_EPP_PERF_MASK; value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp); + if (value == prev) + return 0; + ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); if (ret) { pr_err("failed to set energy perf value (%d)\n", ret); return ret; } - cpudata->epp_cached = epp; + WRITE_ONCE(cpudata->epp_cached, epp); WRITE_ONCE(cpudata->cppc_req_cached, value); return ret; @@ -305,13 +279,16 @@ static int shmem_set_epp(struct amd_cpudata *cpudata, u32 epp) int ret; struct cppc_perf_ctrls perf_ctrls; + if (epp == cpudata->epp_cached) + return 0; + perf_ctrls.energy_perf = epp; ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1); if (ret) { pr_debug("failed to set energy perf value (%d)\n", ret); return ret; } - cpudata->epp_cached = epp; + WRITE_ONCE(cpudata->epp_cached, epp); return ret; } @@ -1214,9 +1191,22 @@ static ssize_t show_energy_performance_preference( struct amd_cpudata *cpudata = policy->driver_data; int preference; - preference = amd_pstate_get_energy_pref_index(cpudata); - if (preference < 0) - return preference; + switch (cpudata->epp_cached) { + case AMD_CPPC_EPP_PERFORMANCE: + preference = EPP_INDEX_PERFORMANCE; + break; + case AMD_CPPC_EPP_BALANCE_PERFORMANCE: + preference = EPP_INDEX_BALANCE_PERFORMANCE; + break; + case AMD_CPPC_EPP_BALANCE_POWERSAVE: + preference = EPP_INDEX_BALANCE_POWERSAVE; + break; + case AMD_CPPC_EPP_POWERSAVE: + preference = EPP_INDEX_POWERSAVE; + break; + default: + return -EINVAL; + } return sysfs_emit(buf, "%s\n", energy_perf_strings[preference]); } @@ -1501,7 +1491,7 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) policy->driver_data = cpudata; - cpudata->epp_cached = cpudata->epp_default = amd_pstate_get_epp(cpudata, 0); + cpudata->epp_cached = cpudata->epp_default = amd_pstate_get_epp(cpudata); policy->min = policy->cpuinfo.min_freq; policy->max = policy->cpuinfo.max_freq; @@ -1555,35 +1545,26 @@ static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata = policy->driver_data; u64 value; - s16 epp; amd_pstate_update_min_max_limit(policy); value = READ_ONCE(cpudata->cppc_req_cached); value &= ~(AMD_CPPC_MAX_PERF_MASK | AMD_CPPC_MIN_PERF_MASK | - AMD_CPPC_DES_PERF_MASK); + AMD_CPPC_DES_PERF_MASK | AMD_CPPC_EPP_PERF_MASK); value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, cpudata->max_limit_perf); value |= FIELD_PREP(AMD_CPPC_DES_PERF_MASK, 0); value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, cpudata->min_limit_perf); - /* Get BIOS pre-defined epp value */ - epp = amd_pstate_get_epp(cpudata, value); - if (epp < 0) { - /** - * This return value can only be negative for shared_memory - * systems where EPP register read/write not supported. - */ - return epp; - } - if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) - epp = 0; + WRITE_ONCE(cpudata->epp_cached, 0); + value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, cpudata->epp_cached); WRITE_ONCE(cpudata->cppc_req_cached, value); if (trace_amd_pstate_epp_perf_enabled()) { - trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, epp, + trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, + cpudata->epp_cached, cpudata->min_limit_perf, cpudata->max_limit_perf, policy->boost_enabled); @@ -1592,7 +1573,7 @@ static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy) amd_pstate_update_perf(cpudata, cpudata->min_limit_perf, 0U, cpudata->max_limit_perf, false); - return amd_pstate_set_epp(cpudata, epp); + return amd_pstate_set_epp(cpudata, READ_ONCE(cpudata->epp_cached)); } static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) From fff395796917ac3fe3b4c4607cb74a8dbdc17593 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Dec 2024 12:52:45 -0600 Subject: [PATCH 18/21] cpufreq/amd-pstate: Always write EPP value when updating perf For MSR systems the EPP value is in the same register as perf targets and so divding them into two separate MSR writes is wasteful. In msr_update_perf(), update both EPP and perf values in one write to MSR_AMD_CPPC_REQ, and cache them if successful. To accomplish this plumb the EPP value into the update_perf call and modify all its callers to check the return value. As this unifies calls, ensure that the MSR write is necessary before flushing a write out. Also drop the comparison from the passive flow tracing. Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20241209185248.16301-13-mario.limonciello@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate-trace.h | 7 +- drivers/cpufreq/amd-pstate.c | 110 +++++++++++++++-------------- 2 files changed, 57 insertions(+), 60 deletions(-) diff --git a/drivers/cpufreq/amd-pstate-trace.h b/drivers/cpufreq/amd-pstate-trace.h index e2221a4b6901..8d692415d905 100644 --- a/drivers/cpufreq/amd-pstate-trace.h +++ b/drivers/cpufreq/amd-pstate-trace.h @@ -32,7 +32,6 @@ TRACE_EVENT(amd_pstate_perf, u64 aperf, u64 tsc, unsigned int cpu_id, - bool changed, bool fast_switch ), @@ -44,7 +43,6 @@ TRACE_EVENT(amd_pstate_perf, aperf, tsc, cpu_id, - changed, fast_switch ), @@ -57,7 +55,6 @@ TRACE_EVENT(amd_pstate_perf, __field(unsigned long long, aperf) __field(unsigned long long, tsc) __field(unsigned int, cpu_id) - __field(bool, changed) __field(bool, fast_switch) ), @@ -70,11 +67,10 @@ TRACE_EVENT(amd_pstate_perf, __entry->aperf = aperf; __entry->tsc = tsc; __entry->cpu_id = cpu_id; - __entry->changed = changed; __entry->fast_switch = fast_switch; ), - TP_printk("amd_min_perf=%lu amd_des_perf=%lu amd_max_perf=%lu freq=%llu mperf=%llu aperf=%llu tsc=%llu cpu_id=%u changed=%s fast_switch=%s", + TP_printk("amd_min_perf=%lu amd_des_perf=%lu amd_max_perf=%lu freq=%llu mperf=%llu aperf=%llu tsc=%llu cpu_id=%u fast_switch=%s", (unsigned long)__entry->min_perf, (unsigned long)__entry->target_perf, (unsigned long)__entry->capacity, @@ -83,7 +79,6 @@ TRACE_EVENT(amd_pstate_perf, (unsigned long long)__entry->aperf, (unsigned long long)__entry->tsc, (unsigned int)__entry->cpu_id, - (__entry->changed) ? "true" : "false", (__entry->fast_switch) ? "true" : "false" ) ); diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index d1f82e4ca9b1..419790e52d91 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -222,25 +222,47 @@ static s16 shmem_get_epp(struct amd_cpudata *cpudata) } static int msr_update_perf(struct amd_cpudata *cpudata, u32 min_perf, - u32 des_perf, u32 max_perf, bool fast_switch) + u32 des_perf, u32 max_perf, u32 epp, bool fast_switch) { - if (fast_switch) { - wrmsrl(MSR_AMD_CPPC_REQ, READ_ONCE(cpudata->cppc_req_cached)); + u64 value, prev; + + value = prev = READ_ONCE(cpudata->cppc_req_cached); + + value &= ~(AMD_CPPC_MAX_PERF_MASK | AMD_CPPC_MIN_PERF_MASK | + AMD_CPPC_DES_PERF_MASK | AMD_CPPC_EPP_PERF_MASK); + value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, max_perf); + value |= FIELD_PREP(AMD_CPPC_DES_PERF_MASK, des_perf); + value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, min_perf); + value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp); + + if (value == prev) return 0; + + if (fast_switch) { + wrmsrl(MSR_AMD_CPPC_REQ, value); + return 0; + } else { + int ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); + + if (ret) + return ret; } - return wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, - READ_ONCE(cpudata->cppc_req_cached)); + WRITE_ONCE(cpudata->cppc_req_cached, value); + WRITE_ONCE(cpudata->epp_cached, epp); + + return 0; } DEFINE_STATIC_CALL(amd_pstate_update_perf, msr_update_perf); static inline int amd_pstate_update_perf(struct amd_cpudata *cpudata, u32 min_perf, u32 des_perf, - u32 max_perf, bool fast_switch) + u32 max_perf, u32 epp, + bool fast_switch) { return static_call(amd_pstate_update_perf)(cpudata, min_perf, des_perf, - max_perf, fast_switch); + max_perf, epp, fast_switch); } static int msr_set_epp(struct amd_cpudata *cpudata, u32 epp) @@ -261,6 +283,7 @@ static int msr_set_epp(struct amd_cpudata *cpudata, u32 epp) return ret; } + /* update both so that msr_update_perf() can effectively check */ WRITE_ONCE(cpudata->epp_cached, epp); WRITE_ONCE(cpudata->cppc_req_cached, value); @@ -459,12 +482,18 @@ static inline int amd_pstate_init_perf(struct amd_cpudata *cpudata) return static_call(amd_pstate_init_perf)(cpudata); } -static int shmem_update_perf(struct amd_cpudata *cpudata, - u32 min_perf, u32 des_perf, - u32 max_perf, bool fast_switch) +static int shmem_update_perf(struct amd_cpudata *cpudata, u32 min_perf, + u32 des_perf, u32 max_perf, u32 epp, bool fast_switch) { struct cppc_perf_ctrls perf_ctrls; + if (cppc_state == AMD_PSTATE_ACTIVE) { + int ret = shmem_set_epp(cpudata, epp); + + if (ret) + return ret; + } + perf_ctrls.max_perf = max_perf; perf_ctrls.min_perf = min_perf; perf_ctrls.desired_perf = des_perf; @@ -510,9 +539,7 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf, { unsigned long max_freq; struct cpufreq_policy *policy = cpufreq_cpu_get(cpudata->cpu); - u64 prev = READ_ONCE(cpudata->cppc_req_cached); u32 nominal_perf = READ_ONCE(cpudata->nominal_perf); - u64 value = prev; des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf); @@ -528,27 +555,14 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf, if (!cpudata->boost_supported) max_perf = min_t(unsigned long, nominal_perf, max_perf); - value &= ~(AMD_CPPC_MAX_PERF_MASK | AMD_CPPC_MIN_PERF_MASK | - AMD_CPPC_DES_PERF_MASK); - value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, max_perf); - value |= FIELD_PREP(AMD_CPPC_DES_PERF_MASK, des_perf); - value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, min_perf); - if (trace_amd_pstate_perf_enabled() && amd_pstate_sample(cpudata)) { trace_amd_pstate_perf(min_perf, des_perf, max_perf, cpudata->freq, cpudata->cur.mperf, cpudata->cur.aperf, cpudata->cur.tsc, - cpudata->cpu, (value != prev), fast_switch); + cpudata->cpu, fast_switch); } - if (value == prev) - goto cpufreq_policy_put; + amd_pstate_update_perf(cpudata, min_perf, des_perf, max_perf, 0, fast_switch); - WRITE_ONCE(cpudata->cppc_req_cached, value); - - amd_pstate_update_perf(cpudata, min_perf, des_perf, - max_perf, fast_switch); - -cpufreq_policy_put: cpufreq_cpu_put(policy); } @@ -1544,36 +1558,24 @@ static void amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy) static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata = policy->driver_data; - u64 value; + u32 epp; amd_pstate_update_min_max_limit(policy); - value = READ_ONCE(cpudata->cppc_req_cached); - - value &= ~(AMD_CPPC_MAX_PERF_MASK | AMD_CPPC_MIN_PERF_MASK | - AMD_CPPC_DES_PERF_MASK | AMD_CPPC_EPP_PERF_MASK); - value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, cpudata->max_limit_perf); - value |= FIELD_PREP(AMD_CPPC_DES_PERF_MASK, 0); - value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, cpudata->min_limit_perf); - if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) - WRITE_ONCE(cpudata->epp_cached, 0); - value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, cpudata->epp_cached); - - WRITE_ONCE(cpudata->cppc_req_cached, value); + epp = 0; + else + epp = READ_ONCE(cpudata->epp_cached); if (trace_amd_pstate_epp_perf_enabled()) { - trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, - cpudata->epp_cached, + trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, epp, cpudata->min_limit_perf, cpudata->max_limit_perf, policy->boost_enabled); } - amd_pstate_update_perf(cpudata, cpudata->min_limit_perf, 0U, - cpudata->max_limit_perf, false); - - return amd_pstate_set_epp(cpudata, READ_ONCE(cpudata->epp_cached)); + return amd_pstate_update_perf(cpudata, cpudata->min_limit_perf, 0U, + cpudata->max_limit_perf, epp, false); } static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) @@ -1602,7 +1604,7 @@ static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) return 0; } -static void amd_pstate_epp_reenable(struct amd_cpudata *cpudata) +static int amd_pstate_epp_reenable(struct amd_cpudata *cpudata) { u64 max_perf; int ret; @@ -1620,17 +1622,19 @@ static void amd_pstate_epp_reenable(struct amd_cpudata *cpudata) max_perf, cpudata->boost_state); } - amd_pstate_update_perf(cpudata, 0, 0, max_perf, false); - amd_pstate_set_epp(cpudata, cpudata->epp_cached); + return amd_pstate_update_perf(cpudata, 0, 0, max_perf, cpudata->epp_cached, false); } static int amd_pstate_epp_cpu_online(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata = policy->driver_data; + int ret; pr_debug("AMD CPU Core %d going online\n", cpudata->cpu); - amd_pstate_epp_reenable(cpudata); + ret = amd_pstate_epp_reenable(cpudata); + if (ret) + return ret; cpudata->suspended = false; return 0; @@ -1654,10 +1658,8 @@ static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy) min_perf, min_perf, policy->boost_enabled); } - amd_pstate_update_perf(cpudata, min_perf, 0, min_perf, false); - amd_pstate_set_epp(cpudata, AMD_CPPC_EPP_BALANCE_POWERSAVE); - - return 0; + return amd_pstate_update_perf(cpudata, min_perf, 0, min_perf, + AMD_CPPC_EPP_BALANCE_POWERSAVE, false); } static int amd_pstate_epp_suspend(struct cpufreq_policy *policy) From f8fde687c911a366a6132aed85f4ee6b647b9160 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Dec 2024 12:52:46 -0600 Subject: [PATCH 19/21] cpufreq/amd-pstate: Drop ret variable from amd_pstate_set_energy_pref_index() The ret variable is not necessary. Reviewed-and-tested-by: Dhananjay Ugwekar Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20241209185248.16301-14-mario.limonciello@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 419790e52d91..4d665d9c76d3 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -319,13 +319,11 @@ static int shmem_set_epp(struct amd_cpudata *cpudata, u32 epp) static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata, int pref_index) { - int epp = -EINVAL; - int ret; + int epp; if (!pref_index) epp = cpudata->epp_default; - - if (epp == -EINVAL) + else epp = epp_values[pref_index]; if (epp > 0 && cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) { @@ -341,9 +339,7 @@ static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata, cpudata->boost_state); } - ret = amd_pstate_set_epp(cpudata, epp); - - return ret; + return amd_pstate_set_epp(cpudata, epp); } static inline int msr_cppc_enable(bool enable) From f9a378ff6443cdcd4387e5dbb76fa5fa549a83ec Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Dec 2024 12:52:47 -0600 Subject: [PATCH 20/21] cpufreq/amd-pstate: Set different default EPP policy for Epyc and Ryzen For Ryzen systems the EPP policy set by the BIOS is generally configured to performance as this is the default register value for the CPPC request MSR. If a user doesn't use additional software to configure EPP then the system will default biased towards performance and consume extra battery. Instead configure the default to "balanced_performance" for this case. Suggested-by: Artem S. Tashkinov Reviewed-by: Dhananjay Ugwekar Tested-by: Dhananjay Ugwekar Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219526 Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20241209185248.16301-15-mario.limonciello@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 4d665d9c76d3..97aee213821d 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -1501,8 +1501,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) policy->driver_data = cpudata; - cpudata->epp_cached = cpudata->epp_default = amd_pstate_get_epp(cpudata); - policy->min = policy->cpuinfo.min_freq; policy->max = policy->cpuinfo.max_freq; @@ -1513,10 +1511,13 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) * the default cpufreq governor is neither powersave nor performance. */ if (amd_pstate_acpi_pm_profile_server() || - amd_pstate_acpi_pm_profile_undefined()) + amd_pstate_acpi_pm_profile_undefined()) { policy->policy = CPUFREQ_POLICY_PERFORMANCE; - else + cpudata->epp_default = amd_pstate_get_epp(cpudata); + } else { policy->policy = CPUFREQ_POLICY_POWERSAVE; + cpudata->epp_default = AMD_CPPC_EPP_BALANCE_PERFORMANCE; + } if (cpu_feature_enabled(X86_FEATURE_CPPC)) { ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value); @@ -1529,6 +1530,9 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) return ret; WRITE_ONCE(cpudata->cppc_cap1_cached, value); } + ret = amd_pstate_set_epp(cpudata, cpudata->epp_default); + if (ret) + return ret; current_pstate_driver->adjust_perf = NULL; From 95fad7fb58cfaa2a295aa54a1f001a16b9324963 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Dec 2024 12:52:48 -0600 Subject: [PATCH 21/21] cpufreq/amd-pstate: Drop boost_state variable Currently boost_state is cached for every processor in cpudata structure and driver boost state is set for every processor. Both of these aren't necessary as the driver only needs to set once and the policy stores whether boost is enabled. Move the driver boost setting to registration and adjust all references to cached value to pull from the policy instead. Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20241209185248.16301-16-mario.limonciello@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 26 +++++++++++++------------- drivers/cpufreq/amd-pstate.h | 1 - 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 97aee213821d..d7b1de97727a 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -316,9 +316,10 @@ static int shmem_set_epp(struct amd_cpudata *cpudata, u32 epp) return ret; } -static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata, - int pref_index) +static int amd_pstate_set_energy_pref_index(struct cpufreq_policy *policy, + int pref_index) { + struct amd_cpudata *cpudata = policy->driver_data; int epp; if (!pref_index) @@ -336,7 +337,7 @@ static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata, epp, FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cpudata->cppc_req_cached), FIELD_GET(AMD_CPPC_MAX_PERF_MASK, cpudata->cppc_req_cached), - cpudata->boost_state); + policy->boost_enabled); } return amd_pstate_set_epp(cpudata, epp); @@ -746,7 +747,6 @@ static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state) guard(mutex)(&amd_pstate_driver_lock); ret = amd_pstate_cpu_boost_update(policy, state); - WRITE_ONCE(cpudata->boost_state, !ret ? state : false); policy->boost_enabled = !ret ? state : false; refresh_frequency_limits(policy); @@ -768,9 +768,6 @@ static int amd_pstate_init_boost_support(struct amd_cpudata *cpudata) goto exit_err; } - /* at least one CPU supports CPB, even if others fail later on to set up */ - current_pstate_driver->boost_enabled = true; - ret = rdmsrl_on_cpu(cpudata->cpu, MSR_K7_HWCR, &boost_val); if (ret) { pr_err_once("failed to read initial CPU boost state!\n"); @@ -1176,7 +1173,6 @@ static ssize_t show_energy_performance_available_preferences( static ssize_t store_energy_performance_preference( struct cpufreq_policy *policy, const char *buf, size_t count) { - struct amd_cpudata *cpudata = policy->driver_data; char str_preference[21]; ssize_t ret; @@ -1190,7 +1186,7 @@ static ssize_t store_energy_performance_preference( guard(mutex)(&amd_pstate_limits_lock); - ret = amd_pstate_set_energy_pref_index(cpudata, ret); + ret = amd_pstate_set_energy_pref_index(policy, ret); return ret ? ret : count; } @@ -1265,6 +1261,9 @@ static int amd_pstate_register_driver(int mode) return ret; } + /* at least one CPU supports CPB */ + current_pstate_driver->boost_enabled = cpu_feature_enabled(X86_FEATURE_CPB); + ret = cpufreq_register_driver(current_pstate_driver); if (ret) { amd_pstate_driver_cleanup(); @@ -1604,8 +1603,9 @@ static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) return 0; } -static int amd_pstate_epp_reenable(struct amd_cpudata *cpudata) +static int amd_pstate_epp_reenable(struct cpufreq_policy *policy) { + struct amd_cpudata *cpudata = policy->driver_data; u64 max_perf; int ret; @@ -1619,7 +1619,7 @@ static int amd_pstate_epp_reenable(struct amd_cpudata *cpudata) trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, cpudata->epp_cached, FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cpudata->cppc_req_cached), - max_perf, cpudata->boost_state); + max_perf, policy->boost_enabled); } return amd_pstate_update_perf(cpudata, 0, 0, max_perf, cpudata->epp_cached, false); @@ -1632,7 +1632,7 @@ static int amd_pstate_epp_cpu_online(struct cpufreq_policy *policy) pr_debug("AMD CPU Core %d going online\n", cpudata->cpu); - ret = amd_pstate_epp_reenable(cpudata); + ret = amd_pstate_epp_reenable(policy); if (ret) return ret; cpudata->suspended = false; @@ -1690,7 +1690,7 @@ static int amd_pstate_epp_resume(struct cpufreq_policy *policy) guard(mutex)(&amd_pstate_limits_lock); /* enable amd pstate from suspend state*/ - amd_pstate_epp_reenable(cpudata); + amd_pstate_epp_reenable(policy); cpudata->suspended = false; } diff --git a/drivers/cpufreq/amd-pstate.h b/drivers/cpufreq/amd-pstate.h index 7765c82f975c..9747e3be6cee 100644 --- a/drivers/cpufreq/amd-pstate.h +++ b/drivers/cpufreq/amd-pstate.h @@ -98,7 +98,6 @@ struct amd_cpudata { u64 cppc_cap1_cached; bool suspended; s16 epp_default; - bool boost_state; }; /*