From 6ceb877d5cecd5417d63239bf833a1cd5f8f271c Mon Sep 17 00:00:00 2001 From: Dhananjay Ugwekar Date: Wed, 5 Feb 2025 11:25:14 +0000 Subject: [PATCH 01/29] cpufreq/amd-pstate: Modify the min_perf calculation in adjust_perf callback Instead of setting a fixed floor at lowest_nonlinear_perf, use the min_limit_perf value, so that it gives the user the freedom to lower the floor further. There are two minimum frequency/perf limits that we need to consider in the adjust_perf callback. One provided by schedutil i.e. the sg_cpu->bw_min value passed in _min_perf arg, another is the effective value of min_freq_qos request that is updated in cpudata->min_limit_perf. Modify the code to use the bigger of these two values. Signed-off-by: Dhananjay Ugwekar Reviewed-by: Mario Limonciello Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20250205112523.201101-4-dhananjay.ugwekar@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 313550fa62d4..17595a2454e1 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -672,7 +672,7 @@ static void amd_pstate_adjust_perf(unsigned int cpu, unsigned long capacity) { unsigned long max_perf, min_perf, des_perf, - cap_perf, lowest_nonlinear_perf; + cap_perf, min_limit_perf; struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); struct amd_cpudata *cpudata; @@ -684,20 +684,20 @@ static void amd_pstate_adjust_perf(unsigned int cpu, if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq) amd_pstate_update_min_max_limit(policy); - cap_perf = READ_ONCE(cpudata->highest_perf); - lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf); + min_limit_perf = READ_ONCE(cpudata->min_limit_perf); des_perf = cap_perf; if (target_perf < capacity) des_perf = DIV_ROUND_UP(cap_perf * target_perf, capacity); - min_perf = READ_ONCE(cpudata->lowest_perf); if (_min_perf < capacity) min_perf = DIV_ROUND_UP(cap_perf * _min_perf, capacity); + else + min_perf = cap_perf; - if (min_perf < lowest_nonlinear_perf) - min_perf = lowest_nonlinear_perf; + if (min_perf < min_limit_perf) + min_perf = min_limit_perf; max_perf = cpudata->max_limit_perf; if (max_perf < min_perf) From 932da6489669da4d61b711c44af208fa431654fa Mon Sep 17 00:00:00 2001 From: Dhananjay Ugwekar Date: Wed, 5 Feb 2025 11:25:15 +0000 Subject: [PATCH 02/29] cpufreq/amd-pstate: Remove the redundant des_perf clamping in adjust_perf des_perf is later on clamped between min_perf and max_perf in amd_pstate_update. So, remove the redundant clamping from amd_pstate_adjust_perf. Signed-off-by: Dhananjay Ugwekar Reviewed-by: Mario Limonciello Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20250205112523.201101-5-dhananjay.ugwekar@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 17595a2454e1..0cf24dff55e9 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -703,8 +703,6 @@ static void amd_pstate_adjust_perf(unsigned int cpu, if (max_perf < min_perf) max_perf = min_perf; - des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf); - amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true, policy->governor->flags); cpufreq_cpu_put(policy); From e9869c836b2a460c48e2d69ae79d786303dbffda Mon Sep 17 00:00:00 2001 From: Dhananjay Ugwekar Date: Wed, 5 Feb 2025 11:25:16 +0000 Subject: [PATCH 03/29] cpufreq/amd-pstate: Pass min/max_limit_perf as min/max_perf to amd_pstate_update Currently, amd_pstate_update_freq passes the hardware perf limits as min/max_perf to amd_pstate_update, which eventually gets programmed into the min/max_perf fields of the CPPC_REQ register. Instead pass the effective perf limits i.e. min/max_limit_perf values to amd_pstate_update as min/max_perf. Signed-off-by: Dhananjay Ugwekar Reviewed-by: Mario Limonciello Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20250205112523.201101-6-dhananjay.ugwekar@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 0cf24dff55e9..3cb81b826bcb 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -615,7 +615,7 @@ static int amd_pstate_update_freq(struct cpufreq_policy *policy, { struct cpufreq_freqs freqs; struct amd_cpudata *cpudata = policy->driver_data; - unsigned long max_perf, min_perf, des_perf, cap_perf; + unsigned long des_perf, cap_perf; if (!cpudata->max_freq) return -ENODEV; @@ -624,8 +624,6 @@ static int amd_pstate_update_freq(struct cpufreq_policy *policy, amd_pstate_update_min_max_limit(policy); cap_perf = READ_ONCE(cpudata->highest_perf); - min_perf = READ_ONCE(cpudata->lowest_perf); - max_perf = cap_perf; freqs.old = policy->cur; freqs.new = target_freq; @@ -642,8 +640,9 @@ static int amd_pstate_update_freq(struct cpufreq_policy *policy, if (!fast_switch) cpufreq_freq_transition_begin(policy, &freqs); - amd_pstate_update(cpudata, min_perf, des_perf, - max_perf, fast_switch, policy->governor->flags); + amd_pstate_update(cpudata, cpudata->min_limit_perf, des_perf, + cpudata->max_limit_perf, fast_switch, + policy->governor->flags); if (!fast_switch) cpufreq_freq_transition_end(policy, &freqs, false); From 555bbe67a622b297405e246d1868dbda3284bde8 Mon Sep 17 00:00:00 2001 From: Dhananjay Ugwekar Date: Wed, 5 Feb 2025 11:25:17 +0000 Subject: [PATCH 04/29] cpufreq/amd-pstate: Convert all perf values to u8 All perf values are always within 0-255 range, hence convert their datatype to u8 everywhere. Signed-off-by: Dhananjay Ugwekar Reviewed-by: Mario Limonciello Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20250205112523.201101-7-dhananjay.ugwekar@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate-trace.h | 46 +++++++++++------------ drivers/cpufreq/amd-pstate.c | 60 +++++++++++++++--------------- drivers/cpufreq/amd-pstate.h | 18 ++++----- 3 files changed, 62 insertions(+), 62 deletions(-) diff --git a/drivers/cpufreq/amd-pstate-trace.h b/drivers/cpufreq/amd-pstate-trace.h index 8d692415d905..f457d4af2c62 100644 --- a/drivers/cpufreq/amd-pstate-trace.h +++ b/drivers/cpufreq/amd-pstate-trace.h @@ -24,9 +24,9 @@ TRACE_EVENT(amd_pstate_perf, - TP_PROTO(unsigned long min_perf, - unsigned long target_perf, - unsigned long capacity, + TP_PROTO(u8 min_perf, + u8 target_perf, + u8 capacity, u64 freq, u64 mperf, u64 aperf, @@ -47,9 +47,9 @@ TRACE_EVENT(amd_pstate_perf, ), TP_STRUCT__entry( - __field(unsigned long, min_perf) - __field(unsigned long, target_perf) - __field(unsigned long, capacity) + __field(u8, min_perf) + __field(u8, target_perf) + __field(u8, capacity) __field(unsigned long long, freq) __field(unsigned long long, mperf) __field(unsigned long long, aperf) @@ -70,10 +70,10 @@ TRACE_EVENT(amd_pstate_perf, __entry->fast_switch = fast_switch; ), - TP_printk("amd_min_perf=%lu amd_des_perf=%lu amd_max_perf=%lu freq=%llu mperf=%llu aperf=%llu tsc=%llu cpu_id=%u fast_switch=%s", - (unsigned long)__entry->min_perf, - (unsigned long)__entry->target_perf, - (unsigned long)__entry->capacity, + TP_printk("amd_min_perf=%hhu amd_des_perf=%hhu amd_max_perf=%hhu freq=%llu mperf=%llu aperf=%llu tsc=%llu cpu_id=%u fast_switch=%s", + (u8)__entry->min_perf, + (u8)__entry->target_perf, + (u8)__entry->capacity, (unsigned long long)__entry->freq, (unsigned long long)__entry->mperf, (unsigned long long)__entry->aperf, @@ -86,10 +86,10 @@ TRACE_EVENT(amd_pstate_perf, TRACE_EVENT(amd_pstate_epp_perf, TP_PROTO(unsigned int cpu_id, - unsigned int highest_perf, - unsigned int epp, - unsigned int min_perf, - unsigned int max_perf, + u8 highest_perf, + u8 epp, + u8 min_perf, + u8 max_perf, bool boost ), @@ -102,10 +102,10 @@ TRACE_EVENT(amd_pstate_epp_perf, TP_STRUCT__entry( __field(unsigned int, cpu_id) - __field(unsigned int, highest_perf) - __field(unsigned int, epp) - __field(unsigned int, min_perf) - __field(unsigned int, max_perf) + __field(u8, highest_perf) + __field(u8, epp) + __field(u8, min_perf) + __field(u8, max_perf) __field(bool, boost) ), @@ -118,12 +118,12 @@ TRACE_EVENT(amd_pstate_epp_perf, __entry->boost = boost; ), - TP_printk("cpu%u: [%u<->%u]/%u, epp=%u, boost=%u", + TP_printk("cpu%u: [%hhu<->%hhu]/%hhu, epp=%hhu, boost=%u", (unsigned int)__entry->cpu_id, - (unsigned int)__entry->min_perf, - (unsigned int)__entry->max_perf, - (unsigned int)__entry->highest_perf, - (unsigned int)__entry->epp, + (u8)__entry->min_perf, + (u8)__entry->max_perf, + (u8)__entry->highest_perf, + (u8)__entry->epp, (bool)__entry->boost ) ); diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 3cb81b826bcb..69409d43eae8 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -186,7 +186,7 @@ static inline int get_mode_idx_from_str(const char *str, size_t size) static DEFINE_MUTEX(amd_pstate_limits_lock); static DEFINE_MUTEX(amd_pstate_driver_lock); -static s16 msr_get_epp(struct amd_cpudata *cpudata) +static u8 msr_get_epp(struct amd_cpudata *cpudata) { u64 value; int ret; @@ -207,7 +207,7 @@ static inline s16 amd_pstate_get_epp(struct amd_cpudata *cpudata) return static_call(amd_pstate_get_epp)(cpudata); } -static s16 shmem_get_epp(struct amd_cpudata *cpudata) +static u8 shmem_get_epp(struct amd_cpudata *cpudata) { u64 epp; int ret; @@ -218,11 +218,11 @@ static s16 shmem_get_epp(struct amd_cpudata *cpudata) return ret; } - return (s16)(epp & 0xff); + return FIELD_GET(AMD_CPPC_EPP_PERF_MASK, epp); } -static int msr_update_perf(struct amd_cpudata *cpudata, u32 min_perf, - u32 des_perf, u32 max_perf, u32 epp, bool fast_switch) +static int msr_update_perf(struct amd_cpudata *cpudata, u8 min_perf, + u8 des_perf, u8 max_perf, u8 epp, bool fast_switch) { u64 value, prev; @@ -257,15 +257,15 @@ static int msr_update_perf(struct amd_cpudata *cpudata, u32 min_perf, DEFINE_STATIC_CALL(amd_pstate_update_perf, msr_update_perf); static inline int amd_pstate_update_perf(struct amd_cpudata *cpudata, - u32 min_perf, u32 des_perf, - u32 max_perf, u32 epp, + u8 min_perf, u8 des_perf, + u8 max_perf, u8 epp, bool fast_switch) { return static_call(amd_pstate_update_perf)(cpudata, min_perf, des_perf, max_perf, epp, fast_switch); } -static int msr_set_epp(struct amd_cpudata *cpudata, u32 epp) +static int msr_set_epp(struct amd_cpudata *cpudata, u8 epp) { u64 value, prev; int ret; @@ -292,12 +292,12 @@ static int msr_set_epp(struct amd_cpudata *cpudata, u32 epp) DEFINE_STATIC_CALL(amd_pstate_set_epp, msr_set_epp); -static inline int amd_pstate_set_epp(struct amd_cpudata *cpudata, u32 epp) +static inline int amd_pstate_set_epp(struct amd_cpudata *cpudata, u8 epp) { return static_call(amd_pstate_set_epp)(cpudata, epp); } -static int shmem_set_epp(struct amd_cpudata *cpudata, u32 epp) +static int shmem_set_epp(struct amd_cpudata *cpudata, u8 epp) { int ret; struct cppc_perf_ctrls perf_ctrls; @@ -320,7 +320,7 @@ static int amd_pstate_set_energy_pref_index(struct cpufreq_policy *policy, int pref_index) { struct amd_cpudata *cpudata = policy->driver_data; - int epp; + u8 epp; if (!pref_index) epp = cpudata->epp_default; @@ -479,8 +479,8 @@ static inline int amd_pstate_init_perf(struct amd_cpudata *cpudata) return static_call(amd_pstate_init_perf)(cpudata); } -static int shmem_update_perf(struct amd_cpudata *cpudata, u32 min_perf, - u32 des_perf, u32 max_perf, u32 epp, bool fast_switch) +static int shmem_update_perf(struct amd_cpudata *cpudata, u8 min_perf, + u8 des_perf, u8 max_perf, u8 epp, bool fast_switch) { struct cppc_perf_ctrls perf_ctrls; @@ -531,14 +531,14 @@ static inline bool amd_pstate_sample(struct amd_cpudata *cpudata) return true; } -static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf, - u32 des_perf, u32 max_perf, bool fast_switch, int gov_flags) +static void amd_pstate_update(struct amd_cpudata *cpudata, u8 min_perf, + u8 des_perf, u8 max_perf, bool fast_switch, int gov_flags) { unsigned long max_freq; struct cpufreq_policy *policy = cpufreq_cpu_get(cpudata->cpu); - u32 nominal_perf = READ_ONCE(cpudata->nominal_perf); + u8 nominal_perf = READ_ONCE(cpudata->nominal_perf); - des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf); + des_perf = clamp_t(u8, des_perf, min_perf, max_perf); max_freq = READ_ONCE(cpudata->max_limit_freq); policy->cur = div_u64(des_perf * max_freq, max_perf); @@ -550,7 +550,7 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf, /* limit the max perf when core performance boost feature is disabled */ if (!cpudata->boost_supported) - max_perf = min_t(unsigned long, nominal_perf, max_perf); + max_perf = min_t(u8, nominal_perf, max_perf); if (trace_amd_pstate_perf_enabled() && amd_pstate_sample(cpudata)) { trace_amd_pstate_perf(min_perf, des_perf, max_perf, cpudata->freq, @@ -591,7 +591,8 @@ static int amd_pstate_verify(struct cpufreq_policy_data *policy_data) static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy) { - u32 max_limit_perf, min_limit_perf, max_perf, max_freq; + u8 max_limit_perf, min_limit_perf, max_perf; + u32 max_freq; struct amd_cpudata *cpudata = policy->driver_data; max_perf = READ_ONCE(cpudata->highest_perf); @@ -615,7 +616,7 @@ static int amd_pstate_update_freq(struct cpufreq_policy *policy, { struct cpufreq_freqs freqs; struct amd_cpudata *cpudata = policy->driver_data; - unsigned long des_perf, cap_perf; + u8 des_perf, cap_perf; if (!cpudata->max_freq) return -ENODEV; @@ -670,8 +671,7 @@ static void amd_pstate_adjust_perf(unsigned int cpu, unsigned long target_perf, unsigned long capacity) { - unsigned long max_perf, min_perf, des_perf, - cap_perf, min_limit_perf; + u8 max_perf, min_perf, des_perf, cap_perf, min_limit_perf; struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); struct amd_cpudata *cpudata; @@ -905,8 +905,8 @@ static int amd_pstate_init_freq(struct amd_cpudata *cpudata) { int ret; u32 min_freq, max_freq; - u32 highest_perf, nominal_perf, nominal_freq; - u32 lowest_nonlinear_perf, lowest_nonlinear_freq; + u8 highest_perf, nominal_perf, lowest_nonlinear_perf; + u32 nominal_freq, lowest_nonlinear_freq; struct cppc_perf_caps cppc_perf; ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); @@ -1113,7 +1113,7 @@ static ssize_t show_amd_pstate_lowest_nonlinear_freq(struct cpufreq_policy *poli static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy, char *buf) { - u32 perf; + u8 perf; struct amd_cpudata *cpudata = policy->driver_data; perf = READ_ONCE(cpudata->highest_perf); @@ -1124,7 +1124,7 @@ static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy, static ssize_t show_amd_pstate_prefcore_ranking(struct cpufreq_policy *policy, char *buf) { - u32 perf; + u8 perf; struct amd_cpudata *cpudata = policy->driver_data; perf = READ_ONCE(cpudata->prefcore_ranking); @@ -1187,7 +1187,7 @@ static ssize_t show_energy_performance_preference( struct cpufreq_policy *policy, char *buf) { struct amd_cpudata *cpudata = policy->driver_data; - int preference; + u8 preference; switch (cpudata->epp_cached) { case AMD_CPPC_EPP_PERFORMANCE: @@ -1549,7 +1549,7 @@ static void amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy) static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata = policy->driver_data; - u32 epp; + u8 epp; amd_pstate_update_min_max_limit(policy); @@ -1598,7 +1598,7 @@ static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) static int amd_pstate_epp_reenable(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata = policy->driver_data; - u64 max_perf; + u8 max_perf; int ret; ret = amd_pstate_cppc_enable(true); @@ -1635,7 +1635,7 @@ static int amd_pstate_epp_cpu_online(struct cpufreq_policy *policy) static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata = policy->driver_data; - int min_perf; + u8 min_perf; if (cpudata->suspended) return 0; diff --git a/drivers/cpufreq/amd-pstate.h b/drivers/cpufreq/amd-pstate.h index 9747e3be6cee..19d405c6d805 100644 --- a/drivers/cpufreq/amd-pstate.h +++ b/drivers/cpufreq/amd-pstate.h @@ -70,13 +70,13 @@ struct amd_cpudata { struct freq_qos_request req[2]; u64 cppc_req_cached; - u32 highest_perf; - u32 nominal_perf; - u32 lowest_nonlinear_perf; - u32 lowest_perf; - u32 prefcore_ranking; - u32 min_limit_perf; - u32 max_limit_perf; + u8 highest_perf; + u8 nominal_perf; + u8 lowest_nonlinear_perf; + u8 lowest_perf; + u8 prefcore_ranking; + u8 min_limit_perf; + u8 max_limit_perf; u32 min_limit_freq; u32 max_limit_freq; @@ -93,11 +93,11 @@ struct amd_cpudata { bool hw_prefcore; /* EPP feature related attributes*/ - s16 epp_cached; + u8 epp_cached; u32 policy; u64 cppc_cap1_cached; bool suspended; - s16 epp_default; + u8 epp_default; }; /* From 620136ced35a9329f4d1ea90e51bee2dfd7ee5b0 Mon Sep 17 00:00:00 2001 From: Dhananjay Ugwekar Date: Wed, 5 Feb 2025 11:25:18 +0000 Subject: [PATCH 05/29] cpufreq/amd-pstate: Modularize perf<->freq conversion Delegate the perf<->frequency conversion to helper functions to reduce code duplication, and improve readability. Signed-off-by: Dhananjay Ugwekar Reviewed-by: Mario Limonciello Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20250205112523.201101-8-dhananjay.ugwekar@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 57 +++++++++++++++++++----------------- 1 file changed, 30 insertions(+), 27 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 69409d43eae8..9ab95ec1f828 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -142,6 +142,20 @@ static struct quirk_entry quirk_amd_7k62 = { .lowest_freq = 550, }; +static inline u8 freq_to_perf(struct amd_cpudata *cpudata, unsigned int freq_val) +{ + u8 perf_val = DIV_ROUND_UP_ULL((u64)freq_val * cpudata->nominal_perf, + cpudata->nominal_freq); + + return clamp_t(u8, perf_val, cpudata->lowest_perf, cpudata->highest_perf); +} + +static inline u32 perf_to_freq(struct amd_cpudata *cpudata, u8 perf_val) +{ + return DIV_ROUND_UP_ULL((u64)cpudata->nominal_freq * perf_val, + cpudata->nominal_perf); +} + static int __init dmi_matched_7k62_bios_bug(const struct dmi_system_id *dmi) { /** @@ -534,14 +548,12 @@ static inline bool amd_pstate_sample(struct amd_cpudata *cpudata) static void amd_pstate_update(struct amd_cpudata *cpudata, u8 min_perf, u8 des_perf, u8 max_perf, bool fast_switch, int gov_flags) { - unsigned long max_freq; struct cpufreq_policy *policy = cpufreq_cpu_get(cpudata->cpu); u8 nominal_perf = READ_ONCE(cpudata->nominal_perf); des_perf = clamp_t(u8, des_perf, min_perf, max_perf); - max_freq = READ_ONCE(cpudata->max_limit_freq); - policy->cur = div_u64(des_perf * max_freq, max_perf); + policy->cur = perf_to_freq(cpudata, des_perf); if ((cppc_state == AMD_PSTATE_GUIDED) && (gov_flags & CPUFREQ_GOV_DYNAMIC_SWITCHING)) { min_perf = des_perf; @@ -591,14 +603,11 @@ static int amd_pstate_verify(struct cpufreq_policy_data *policy_data) static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy) { - u8 max_limit_perf, min_limit_perf, max_perf; - u32 max_freq; + u8 max_limit_perf, min_limit_perf; struct amd_cpudata *cpudata = policy->driver_data; - max_perf = READ_ONCE(cpudata->highest_perf); - max_freq = READ_ONCE(cpudata->max_freq); - max_limit_perf = div_u64(policy->max * max_perf, max_freq); - min_limit_perf = div_u64(policy->min * max_perf, max_freq); + max_limit_perf = freq_to_perf(cpudata, policy->max); + min_limit_perf = freq_to_perf(cpudata, policy->min); if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) min_limit_perf = min(cpudata->nominal_perf, max_limit_perf); @@ -616,21 +625,15 @@ static int amd_pstate_update_freq(struct cpufreq_policy *policy, { struct cpufreq_freqs freqs; struct amd_cpudata *cpudata = policy->driver_data; - u8 des_perf, cap_perf; - - if (!cpudata->max_freq) - return -ENODEV; + u8 des_perf; if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq) amd_pstate_update_min_max_limit(policy); - cap_perf = READ_ONCE(cpudata->highest_perf); - freqs.old = policy->cur; freqs.new = target_freq; - des_perf = DIV_ROUND_CLOSEST(target_freq * cap_perf, - cpudata->max_freq); + des_perf = freq_to_perf(cpudata, target_freq); WARN_ON(fast_switch && !policy->fast_switch_enabled); /* @@ -905,7 +908,6 @@ static int amd_pstate_init_freq(struct amd_cpudata *cpudata) { int ret; u32 min_freq, max_freq; - u8 highest_perf, nominal_perf, lowest_nonlinear_perf; u32 nominal_freq, lowest_nonlinear_freq; struct cppc_perf_caps cppc_perf; @@ -923,16 +925,17 @@ static int amd_pstate_init_freq(struct amd_cpudata *cpudata) else nominal_freq = cppc_perf.nominal_freq; - highest_perf = READ_ONCE(cpudata->highest_perf); - nominal_perf = READ_ONCE(cpudata->nominal_perf); - max_freq = div_u64((u64)highest_perf * nominal_freq, nominal_perf); + min_freq *= 1000; + nominal_freq *= 1000; - lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf); - lowest_nonlinear_freq = div_u64((u64)nominal_freq * lowest_nonlinear_perf, nominal_perf); - WRITE_ONCE(cpudata->min_freq, min_freq * 1000); - WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq * 1000); - WRITE_ONCE(cpudata->nominal_freq, nominal_freq * 1000); - WRITE_ONCE(cpudata->max_freq, max_freq * 1000); + WRITE_ONCE(cpudata->nominal_freq, nominal_freq); + WRITE_ONCE(cpudata->min_freq, min_freq); + + max_freq = perf_to_freq(cpudata, cpudata->highest_perf); + lowest_nonlinear_freq = perf_to_freq(cpudata, cpudata->lowest_nonlinear_perf); + + WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq); + WRITE_ONCE(cpudata->max_freq, max_freq); /** * Below values need to be initialized correctly, otherwise driver will fail to load From b899434857b0f1ab460b3c126cbed82ab9b52d43 Mon Sep 17 00:00:00 2001 From: Dhananjay Ugwekar Date: Wed, 5 Feb 2025 11:25:19 +0000 Subject: [PATCH 06/29] cpufreq/amd-pstate: Remove the unnecessary cpufreq_update_policy call The update_limits callback is only called in two conditions. * When the preferred core rankings change. In which case, we just need to change the prefcore ranking in the cpudata struct. As there are no changes to any of the perf values, there is no need to call cpufreq_update_policy() * When the _PPC ACPI object changes, i.e. the highest allowed Pstate changes. The _PPC object is only used for a table based cpufreq driver like acpi-cpufreq, hence is irrelevant for CPPC based amd-pstate. Hence, the cpufreq_update_policy() call becomes unnecessary and can be removed. Signed-off-by: Dhananjay Ugwekar Reviewed-by: Mario Limonciello Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20250205112523.201101-9-dhananjay.ugwekar@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 9ab95ec1f828..9c939be59042 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -853,10 +853,6 @@ static void amd_pstate_update_limits(unsigned int cpu) sched_set_itmt_core_prio((int)cur_high, cpu); } cpufreq_cpu_put(policy); - - if (!highest_perf_changed) - cpufreq_update_policy(cpu); - } /* From 426db24d4db2e4f0d6720aeb7795eafcb9e82640 Mon Sep 17 00:00:00 2001 From: Dhananjay Ugwekar Date: Wed, 5 Feb 2025 11:25:21 +0000 Subject: [PATCH 07/29] cpufreq/amd-pstate: Add missing NULL ptr check in amd_pstate_update Check if policy is NULL before dereferencing it in amd_pstate_update. Fixes: e8f555daacd3 ("cpufreq/amd-pstate: fix setting policy current frequency value") Signed-off-by: Dhananjay Ugwekar Reviewed-by: Mario Limonciello Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20250205112523.201101-11-dhananjay.ugwekar@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 9c939be59042..6a604f0797d9 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -551,6 +551,9 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u8 min_perf, struct cpufreq_policy *policy = cpufreq_cpu_get(cpudata->cpu); u8 nominal_perf = READ_ONCE(cpudata->nominal_perf); + if (!policy) + return; + des_perf = clamp_t(u8, des_perf, min_perf, max_perf); policy->cur = perf_to_freq(cpudata, des_perf); From 97a705dc1a3654d8d2e466433a897be202a7f0ac Mon Sep 17 00:00:00 2001 From: Dhananjay Ugwekar Date: Wed, 5 Feb 2025 11:25:22 +0000 Subject: [PATCH 08/29] cpufreq/amd-pstate: Use scope based cleanup for cpufreq_policy refs There have been instances in past where refcount decrementing is missed while exiting a function. Use automatic scope based cleanup to avoid such errors. Signed-off-by: Dhananjay Ugwekar Reviewed-by: Mario Limonciello Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20250205112523.201101-12-dhananjay.ugwekar@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 25 ++++++++----------------- include/linux/cpufreq.h | 3 +++ 2 files changed, 11 insertions(+), 17 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 6a604f0797d9..ee7e3f0a4c0a 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -548,7 +548,7 @@ static inline bool amd_pstate_sample(struct amd_cpudata *cpudata) static void amd_pstate_update(struct amd_cpudata *cpudata, u8 min_perf, u8 des_perf, u8 max_perf, bool fast_switch, int gov_flags) { - struct cpufreq_policy *policy = cpufreq_cpu_get(cpudata->cpu); + struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpudata->cpu); u8 nominal_perf = READ_ONCE(cpudata->nominal_perf); if (!policy) @@ -574,8 +574,6 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u8 min_perf, } amd_pstate_update_perf(cpudata, min_perf, des_perf, max_perf, 0, fast_switch); - - cpufreq_cpu_put(policy); } static int amd_pstate_verify(struct cpufreq_policy_data *policy_data) @@ -587,7 +585,8 @@ static int amd_pstate_verify(struct cpufreq_policy_data *policy_data) * amd-pstate qos_requests. */ if (policy_data->min == FREQ_QOS_MIN_DEFAULT_VALUE) { - struct cpufreq_policy *policy = cpufreq_cpu_get(policy_data->cpu); + struct cpufreq_policy *policy __free(put_cpufreq_policy) = + cpufreq_cpu_get(policy_data->cpu); struct amd_cpudata *cpudata; if (!policy) @@ -595,7 +594,6 @@ static int amd_pstate_verify(struct cpufreq_policy_data *policy_data) cpudata = policy->driver_data; policy_data->min = cpudata->lowest_nonlinear_freq; - cpufreq_cpu_put(policy); } cpufreq_verify_within_cpu_limits(policy_data); @@ -678,7 +676,7 @@ static void amd_pstate_adjust_perf(unsigned int cpu, unsigned long capacity) { u8 max_perf, min_perf, des_perf, cap_perf, min_limit_perf; - struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); + struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpu); struct amd_cpudata *cpudata; if (!policy) @@ -710,7 +708,6 @@ static void amd_pstate_adjust_perf(unsigned int cpu, amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true, policy->governor->flags); - cpufreq_cpu_put(policy); } static int amd_pstate_cpu_boost_update(struct cpufreq_policy *policy, bool on) @@ -824,28 +821,23 @@ static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata) static void amd_pstate_update_limits(unsigned int cpu) { - struct cpufreq_policy *policy = NULL; + struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpu); struct amd_cpudata *cpudata; u32 prev_high = 0, cur_high = 0; - int ret; bool highest_perf_changed = false; if (!amd_pstate_prefcore) return; - policy = cpufreq_cpu_get(cpu); if (!policy) return; - cpudata = policy->driver_data; - guard(mutex)(&amd_pstate_driver_lock); - ret = amd_get_highest_perf(cpu, &cur_high); - if (ret) { - cpufreq_cpu_put(policy); + if (amd_get_highest_perf(cpu, &cur_high)) return; - } + + cpudata = policy->driver_data; prev_high = READ_ONCE(cpudata->prefcore_ranking); highest_perf_changed = (prev_high != cur_high); @@ -855,7 +847,6 @@ static void amd_pstate_update_limits(unsigned int cpu) if (cur_high < CPPC_MAX_PERF) sched_set_itmt_core_prio((int)cur_high, cpu); } - cpufreq_cpu_put(policy); } /* diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 7fe0981a7e46..dde5212d256c 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -210,6 +210,9 @@ static inline struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu) static inline void cpufreq_cpu_put(struct cpufreq_policy *policy) { } #endif +/* Scope based cleanup macro for cpufreq_policy kobject reference counting */ +DEFINE_FREE(put_cpufreq_policy, struct cpufreq_policy *, if (_T) cpufreq_cpu_put(_T)) + static inline bool policy_is_inactive(struct cpufreq_policy *policy) { return cpumask_empty(policy->cpus); From 3e93edc58a63cf816e6dc853da8e9b0201bd0298 Mon Sep 17 00:00:00 2001 From: Dhananjay Ugwekar Date: Wed, 5 Feb 2025 11:25:23 +0000 Subject: [PATCH 09/29] cpufreq/amd-pstate: Remove the unncecessary driver_lock in amd_pstate_update_limits There is no need to take a driver wide lock while updating the highest_perf value in the percpu cpudata struct. Hence remove it. Signed-off-by: Dhananjay Ugwekar Reviewed-by: Mario Limonciello Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20250205112523.201101-13-dhananjay.ugwekar@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index ee7e3f0a4c0a..08ae48076812 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -832,8 +832,6 @@ static void amd_pstate_update_limits(unsigned int cpu) if (!policy) return; - guard(mutex)(&amd_pstate_driver_lock); - if (amd_get_highest_perf(cpu, &cur_high)) return; From a1d1d8fb653532638cfb3ee0b7e67ebd5327a3d6 Mon Sep 17 00:00:00 2001 From: Dhananjay Ugwekar Date: Sat, 22 Feb 2025 03:32:22 +0000 Subject: [PATCH 10/29] cpufreq/amd-pstate: Fix the clamping of perf values The clamping in freq_to_perf() is broken right now, as we first typecast (read wraparound) the overflowing value into a u8 and then clamp it down. So, use a u32 to store the >255 value in certain edge cases and then clamp it down into a u8. Also, use a "explicit typecast + clamp" instead of just a "clamp_t" as the latter typecasts first and then clamps between the limits, which defeats our purpose. Fixes: 620136ced35a ("cpufreq/amd-pstate: Modularize perf<->freq conversion") Signed-off-by: Dhananjay Ugwekar Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20250222033221.554976-1-dhananjay.ugwekar@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 08ae48076812..56930424c9fa 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -144,10 +144,10 @@ static struct quirk_entry quirk_amd_7k62 = { static inline u8 freq_to_perf(struct amd_cpudata *cpudata, unsigned int freq_val) { - u8 perf_val = DIV_ROUND_UP_ULL((u64)freq_val * cpudata->nominal_perf, + u32 perf_val = DIV_ROUND_UP_ULL((u64)freq_val * cpudata->nominal_perf, cpudata->nominal_freq); - return clamp_t(u8, perf_val, cpudata->lowest_perf, cpudata->highest_perf); + return (u8)clamp(perf_val, cpudata->lowest_perf, cpudata->highest_perf); } static inline u32 perf_to_freq(struct amd_cpudata *cpudata, u8 perf_val) From b7a41156588ad03757bf0a2f0e05d6cbcebeaa9e Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 17 Feb 2025 13:28:51 -0600 Subject: [PATCH 11/29] cpufreq/amd-pstate: Invalidate cppc_req_cached during suspend During resume it's possible the firmware didn't restore the CPPC request MSR but the kernel thinks the values line up. This leads to incorrect performance after resume from suspend. To fix the issue invalidate the cached value at suspend. During resume use the saved values programmed as cached limits. Reviewed-by: Gautham R. Shenoy Reviewed-by: Dhananjay Ugwekar Reported-by: Miroslav Pavleski Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217931 Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 56930424c9fa..44318eb33463 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -1605,7 +1605,7 @@ static int amd_pstate_epp_reenable(struct cpufreq_policy *policy) max_perf, policy->boost_enabled); } - return amd_pstate_update_perf(cpudata, 0, 0, max_perf, cpudata->epp_cached, false); + return amd_pstate_epp_update_limit(policy); } static int amd_pstate_epp_cpu_online(struct cpufreq_policy *policy) @@ -1654,6 +1654,9 @@ static int amd_pstate_epp_suspend(struct cpufreq_policy *policy) if (cppc_state != AMD_PSTATE_ACTIVE) return 0; + /* invalidate to ensure it's rewritten during resume */ + cpudata->cppc_req_cached = 0; + /* set this flag to avoid setting core offline*/ cpudata->suspended = true; From a9ba0fd452d82ca0da170eb6291aac01075a17d5 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 11 Dec 2024 14:57:57 -0600 Subject: [PATCH 12/29] cpufreq/amd-pstate: Show a warning when a CPU fails to setup I came across a system that MSR_AMD_CPPC_CAP1 for some CPUs isn't populated. This is an unexpected behavior that is most likely a BIOS bug. In the event it happens I'd like users to report bugs to properly root cause and get this fixed. Reviewed-by: Gautham R. Shenoy Reviewed-by: Dhananjay Ugwekar Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 44318eb33463..29250638a2ac 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -1028,6 +1028,7 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy) free_cpudata2: freq_qos_remove_request(&cpudata->req[0]); free_cpudata1: + pr_warn("Failed to initialize CPU %d: %d\n", policy->cpu, ret); kfree(cpudata); return ret; } @@ -1521,6 +1522,7 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) return 0; free_cpudata1: + pr_warn("Failed to initialize CPU %d: %d\n", policy->cpu, ret); kfree(cpudata); return ret; } From a9b9b4c2a4cdd00428d14914e3c18be3856aba71 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 23 Jan 2025 16:16:01 -0600 Subject: [PATCH 13/29] cpufreq/amd-pstate: Drop min and max cached frequencies Use the perf_to_freq helpers to calculate this on the fly. As the members are no longer cached add an extra check into amd_pstate_epp_update_limit() to avoid unnecessary calls in amd_pstate_update_min_max_limit(). Reviewed-by: Gautham R. Shenoy Reviewed-by: Dhananjay Ugwekar Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate-ut.c | 14 +++++------ drivers/cpufreq/amd-pstate.c | 43 +++++++++------------------------ drivers/cpufreq/amd-pstate.h | 9 ++----- 3 files changed, 20 insertions(+), 46 deletions(-) diff --git a/drivers/cpufreq/amd-pstate-ut.c b/drivers/cpufreq/amd-pstate-ut.c index 3a0a380c3590..445278cf40b6 100644 --- a/drivers/cpufreq/amd-pstate-ut.c +++ b/drivers/cpufreq/amd-pstate-ut.c @@ -214,14 +214,14 @@ static void amd_pstate_ut_check_freq(u32 index) break; cpudata = policy->driver_data; - if (!((cpudata->max_freq >= cpudata->nominal_freq) && + if (!((policy->cpuinfo.max_freq >= cpudata->nominal_freq) && (cpudata->nominal_freq > cpudata->lowest_nonlinear_freq) && - (cpudata->lowest_nonlinear_freq > cpudata->min_freq) && - (cpudata->min_freq > 0))) { + (cpudata->lowest_nonlinear_freq > policy->cpuinfo.min_freq) && + (policy->cpuinfo.min_freq > 0))) { amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; pr_err("%s cpu%d max=%d >= nominal=%d > lowest_nonlinear=%d > min=%d > 0, the formula is incorrect!\n", - __func__, cpu, cpudata->max_freq, cpudata->nominal_freq, - cpudata->lowest_nonlinear_freq, cpudata->min_freq); + __func__, cpu, policy->cpuinfo.max_freq, cpudata->nominal_freq, + cpudata->lowest_nonlinear_freq, policy->cpuinfo.min_freq); goto skip_test; } @@ -233,13 +233,13 @@ static void amd_pstate_ut_check_freq(u32 index) } if (cpudata->boost_supported) { - if ((policy->max == cpudata->max_freq) || + if ((policy->max == policy->cpuinfo.max_freq) || (policy->max == cpudata->nominal_freq)) amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS; else { amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; pr_err("%s cpu%d policy_max=%d should be equal cpu_max=%d or cpu_nominal=%d !\n", - __func__, cpu, policy->max, cpudata->max_freq, + __func__, cpu, policy->max, policy->cpuinfo.max_freq, cpudata->nominal_freq); goto skip_test; } diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 29250638a2ac..ac5a6fc61db7 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -717,7 +717,7 @@ static int amd_pstate_cpu_boost_update(struct cpufreq_policy *policy, bool on) int ret = 0; nominal_freq = READ_ONCE(cpudata->nominal_freq); - max_freq = READ_ONCE(cpudata->max_freq); + max_freq = perf_to_freq(cpudata, READ_ONCE(cpudata->highest_perf)); if (on) policy->cpuinfo.max_freq = max_freq; @@ -917,13 +917,10 @@ static int amd_pstate_init_freq(struct amd_cpudata *cpudata) nominal_freq *= 1000; WRITE_ONCE(cpudata->nominal_freq, nominal_freq); - WRITE_ONCE(cpudata->min_freq, min_freq); max_freq = perf_to_freq(cpudata, cpudata->highest_perf); lowest_nonlinear_freq = perf_to_freq(cpudata, cpudata->lowest_nonlinear_perf); - WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq); - WRITE_ONCE(cpudata->max_freq, max_freq); /** * Below values need to be initialized correctly, otherwise driver will fail to load @@ -948,9 +945,9 @@ static int amd_pstate_init_freq(struct amd_cpudata *cpudata) static int amd_pstate_cpu_init(struct cpufreq_policy *policy) { - int min_freq, max_freq, ret; - struct device *dev; struct amd_cpudata *cpudata; + struct device *dev; + int ret; /* * Resetting PERF_CTL_MSR will put the CPU in P0 frequency, @@ -981,17 +978,11 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy) if (ret) goto free_cpudata1; - min_freq = READ_ONCE(cpudata->min_freq); - max_freq = READ_ONCE(cpudata->max_freq); - policy->cpuinfo.transition_latency = amd_pstate_get_transition_latency(policy->cpu); policy->transition_delay_us = amd_pstate_get_transition_delay_us(policy->cpu); - policy->min = min_freq; - policy->max = max_freq; - - policy->cpuinfo.min_freq = min_freq; - policy->cpuinfo.max_freq = max_freq; + policy->cpuinfo.min_freq = policy->min = perf_to_freq(cpudata, cpudata->lowest_perf); + policy->cpuinfo.max_freq = policy->max = perf_to_freq(cpudata, cpudata->highest_perf); policy->boost_enabled = READ_ONCE(cpudata->boost_supported); @@ -1015,9 +1006,6 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy) goto free_cpudata2; } - cpudata->max_limit_freq = max_freq; - cpudata->min_limit_freq = min_freq; - policy->driver_data = cpudata; if (!current_pstate_driver->adjust_perf) @@ -1075,14 +1063,10 @@ static int amd_pstate_cpu_suspend(struct cpufreq_policy *policy) static ssize_t show_amd_pstate_max_freq(struct cpufreq_policy *policy, char *buf) { - int max_freq; struct amd_cpudata *cpudata = policy->driver_data; - max_freq = READ_ONCE(cpudata->max_freq); - if (max_freq < 0) - return max_freq; - return sysfs_emit(buf, "%u\n", max_freq); + return sysfs_emit(buf, "%u\n", perf_to_freq(cpudata, READ_ONCE(cpudata->highest_perf))); } static ssize_t show_amd_pstate_lowest_nonlinear_freq(struct cpufreq_policy *policy, @@ -1440,10 +1424,10 @@ static bool amd_pstate_acpi_pm_profile_undefined(void) static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) { - int min_freq, max_freq, ret; struct amd_cpudata *cpudata; struct device *dev; u64 value; + int ret; /* * Resetting PERF_CTL_MSR will put the CPU in P0 frequency, @@ -1474,19 +1458,13 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) if (ret) goto free_cpudata1; - min_freq = READ_ONCE(cpudata->min_freq); - max_freq = READ_ONCE(cpudata->max_freq); - - policy->cpuinfo.min_freq = min_freq; - policy->cpuinfo.max_freq = max_freq; + policy->cpuinfo.min_freq = policy->min = perf_to_freq(cpudata, cpudata->lowest_perf); + policy->cpuinfo.max_freq = policy->max = perf_to_freq(cpudata, cpudata->highest_perf); /* It will be updated by governor */ policy->cur = policy->cpuinfo.min_freq; policy->driver_data = cpudata; - policy->min = policy->cpuinfo.min_freq; - policy->max = policy->cpuinfo.max_freq; - policy->boost_enabled = READ_ONCE(cpudata->boost_supported); /* @@ -1544,7 +1522,8 @@ static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy) struct amd_cpudata *cpudata = policy->driver_data; u8 epp; - amd_pstate_update_min_max_limit(policy); + if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq) + amd_pstate_update_min_max_limit(policy); if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) epp = 0; diff --git a/drivers/cpufreq/amd-pstate.h b/drivers/cpufreq/amd-pstate.h index 19d405c6d805..014993369245 100644 --- a/drivers/cpufreq/amd-pstate.h +++ b/drivers/cpufreq/amd-pstate.h @@ -46,8 +46,6 @@ struct amd_aperf_mperf { * @max_limit_perf: Cached value of the performance corresponding to policy->max * @min_limit_freq: Cached value of policy->min (in khz) * @max_limit_freq: Cached value of policy->max (in khz) - * @max_freq: the frequency (in khz) that mapped to highest_perf - * @min_freq: the frequency (in khz) that mapped to lowest_perf * @nominal_freq: the frequency (in khz) that mapped to nominal_perf * @lowest_nonlinear_freq: the frequency (in khz) that mapped to lowest_nonlinear_perf * @cur: Difference of Aperf/Mperf/tsc count between last and current sample @@ -77,11 +75,8 @@ struct amd_cpudata { u8 prefcore_ranking; u8 min_limit_perf; u8 max_limit_perf; - u32 min_limit_freq; - u32 max_limit_freq; - - u32 max_freq; - u32 min_freq; + u32 min_limit_freq; + u32 max_limit_freq; u32 nominal_freq; u32 lowest_nonlinear_freq; From 009d1c29a45194212e9310ccd91a19a673908a5c Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 17 Jan 2025 18:34:38 -0600 Subject: [PATCH 14/29] cpufreq/amd-pstate: Move perf values into a union By storing perf values in a union all the writes and reads can be done atomically, removing the need for some concurrency protections. While making this change, also drop the cached frequency values, using inline helpers to calculate them on demand from perf value. Reviewed-by: Gautham R. Shenoy Reviewed-by: Dhananjay Ugwekar Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate-ut.c | 18 +-- drivers/cpufreq/amd-pstate.c | 213 ++++++++++++++++++-------------- drivers/cpufreq/amd-pstate.h | 51 +++++--- 3 files changed, 162 insertions(+), 120 deletions(-) diff --git a/drivers/cpufreq/amd-pstate-ut.c b/drivers/cpufreq/amd-pstate-ut.c index 445278cf40b6..5f6a92a816e6 100644 --- a/drivers/cpufreq/amd-pstate-ut.c +++ b/drivers/cpufreq/amd-pstate-ut.c @@ -129,6 +129,7 @@ static void amd_pstate_ut_check_perf(u32 index) struct cppc_perf_caps cppc_perf; struct cpufreq_policy *policy = NULL; struct amd_cpudata *cpudata = NULL; + union perf_cached cur_perf; for_each_possible_cpu(cpu) { policy = cpufreq_cpu_get(cpu); @@ -162,19 +163,20 @@ static void amd_pstate_ut_check_perf(u32 index) lowest_perf = AMD_CPPC_LOWEST_PERF(cap1); } - if (highest_perf != READ_ONCE(cpudata->highest_perf) && !cpudata->hw_prefcore) { + cur_perf = READ_ONCE(cpudata->perf); + if (highest_perf != cur_perf.highest_perf && !cpudata->hw_prefcore) { pr_err("%s cpu%d highest=%d %d highest perf doesn't match\n", - __func__, cpu, highest_perf, cpudata->highest_perf); + __func__, cpu, highest_perf, cur_perf.highest_perf); goto skip_test; } - if ((nominal_perf != READ_ONCE(cpudata->nominal_perf)) || - (lowest_nonlinear_perf != READ_ONCE(cpudata->lowest_nonlinear_perf)) || - (lowest_perf != READ_ONCE(cpudata->lowest_perf))) { + if (nominal_perf != cur_perf.nominal_perf || + (lowest_nonlinear_perf != cur_perf.lowest_nonlinear_perf) || + (lowest_perf != cur_perf.lowest_perf)) { amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; pr_err("%s cpu%d nominal=%d %d lowest_nonlinear=%d %d lowest=%d %d, they should be equal!\n", - __func__, cpu, nominal_perf, cpudata->nominal_perf, - lowest_nonlinear_perf, cpudata->lowest_nonlinear_perf, - lowest_perf, cpudata->lowest_perf); + __func__, cpu, nominal_perf, cur_perf.nominal_perf, + lowest_nonlinear_perf, cur_perf.lowest_nonlinear_perf, + lowest_perf, cur_perf.lowest_perf); goto skip_test; } diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index ac5a6fc61db7..983c8728701e 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -142,18 +142,17 @@ static struct quirk_entry quirk_amd_7k62 = { .lowest_freq = 550, }; -static inline u8 freq_to_perf(struct amd_cpudata *cpudata, unsigned int freq_val) +static inline u8 freq_to_perf(union perf_cached perf, u32 nominal_freq, unsigned int freq_val) { - u32 perf_val = DIV_ROUND_UP_ULL((u64)freq_val * cpudata->nominal_perf, - cpudata->nominal_freq); + u32 perf_val = DIV_ROUND_UP_ULL((u64)freq_val * perf.nominal_perf, nominal_freq); - return (u8)clamp(perf_val, cpudata->lowest_perf, cpudata->highest_perf); + return (u8)clamp(perf_val, perf.lowest_perf, perf.highest_perf); } -static inline u32 perf_to_freq(struct amd_cpudata *cpudata, u8 perf_val) +static inline u32 perf_to_freq(union perf_cached perf, u32 nominal_freq, u8 perf_val) { - return DIV_ROUND_UP_ULL((u64)cpudata->nominal_freq * perf_val, - cpudata->nominal_perf); + return DIV_ROUND_UP_ULL((u64)nominal_freq * perf_val, + perf.nominal_perf); } static int __init dmi_matched_7k62_bios_bug(const struct dmi_system_id *dmi) @@ -347,7 +346,9 @@ static int amd_pstate_set_energy_pref_index(struct cpufreq_policy *policy, } if (trace_amd_pstate_epp_perf_enabled()) { - trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, + union perf_cached perf = READ_ONCE(cpudata->perf); + + trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf, epp, FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cpudata->cppc_req_cached), FIELD_GET(AMD_CPPC_MAX_PERF_MASK, cpudata->cppc_req_cached), @@ -425,6 +426,7 @@ static inline int amd_pstate_cppc_enable(bool enable) static int msr_init_perf(struct amd_cpudata *cpudata) { + union perf_cached perf = READ_ONCE(cpudata->perf); u64 cap1, numerator; int ret = rdmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, @@ -436,19 +438,21 @@ static int msr_init_perf(struct amd_cpudata *cpudata) if (ret) return ret; - WRITE_ONCE(cpudata->highest_perf, numerator); - WRITE_ONCE(cpudata->max_limit_perf, numerator); - WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1)); - WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1)); - WRITE_ONCE(cpudata->lowest_perf, AMD_CPPC_LOWEST_PERF(cap1)); + perf.highest_perf = numerator; + perf.max_limit_perf = numerator; + perf.min_limit_perf = AMD_CPPC_LOWEST_PERF(cap1); + perf.nominal_perf = AMD_CPPC_NOMINAL_PERF(cap1); + perf.lowest_nonlinear_perf = AMD_CPPC_LOWNONLIN_PERF(cap1); + perf.lowest_perf = AMD_CPPC_LOWEST_PERF(cap1); + WRITE_ONCE(cpudata->perf, perf); WRITE_ONCE(cpudata->prefcore_ranking, AMD_CPPC_HIGHEST_PERF(cap1)); - WRITE_ONCE(cpudata->min_limit_perf, AMD_CPPC_LOWEST_PERF(cap1)); return 0; } static int shmem_init_perf(struct amd_cpudata *cpudata) { struct cppc_perf_caps cppc_perf; + union perf_cached perf = READ_ONCE(cpudata->perf); u64 numerator; int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); @@ -459,14 +463,14 @@ static int shmem_init_perf(struct amd_cpudata *cpudata) if (ret) return ret; - WRITE_ONCE(cpudata->highest_perf, numerator); - WRITE_ONCE(cpudata->max_limit_perf, numerator); - WRITE_ONCE(cpudata->nominal_perf, cppc_perf.nominal_perf); - WRITE_ONCE(cpudata->lowest_nonlinear_perf, - cppc_perf.lowest_nonlinear_perf); - WRITE_ONCE(cpudata->lowest_perf, cppc_perf.lowest_perf); + perf.highest_perf = numerator; + perf.max_limit_perf = numerator; + perf.min_limit_perf = cppc_perf.lowest_perf; + perf.nominal_perf = cppc_perf.nominal_perf; + perf.lowest_nonlinear_perf = cppc_perf.lowest_nonlinear_perf; + perf.lowest_perf = cppc_perf.lowest_perf; + WRITE_ONCE(cpudata->perf, perf); WRITE_ONCE(cpudata->prefcore_ranking, cppc_perf.highest_perf); - WRITE_ONCE(cpudata->min_limit_perf, cppc_perf.lowest_perf); if (cppc_state == AMD_PSTATE_ACTIVE) return 0; @@ -549,14 +553,14 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u8 min_perf, u8 des_perf, u8 max_perf, bool fast_switch, int gov_flags) { struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpudata->cpu); - u8 nominal_perf = READ_ONCE(cpudata->nominal_perf); + union perf_cached perf = READ_ONCE(cpudata->perf); if (!policy) return; des_perf = clamp_t(u8, des_perf, min_perf, max_perf); - policy->cur = perf_to_freq(cpudata, des_perf); + policy->cur = perf_to_freq(perf, cpudata->nominal_freq, des_perf); if ((cppc_state == AMD_PSTATE_GUIDED) && (gov_flags & CPUFREQ_GOV_DYNAMIC_SWITCHING)) { min_perf = des_perf; @@ -565,7 +569,7 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u8 min_perf, /* limit the max perf when core performance boost feature is disabled */ if (!cpudata->boost_supported) - max_perf = min_t(u8, nominal_perf, max_perf); + max_perf = min_t(u8, perf.nominal_perf, max_perf); if (trace_amd_pstate_perf_enabled() && amd_pstate_sample(cpudata)) { trace_amd_pstate_perf(min_perf, des_perf, max_perf, cpudata->freq, @@ -602,39 +606,41 @@ static int amd_pstate_verify(struct cpufreq_policy_data *policy_data) return 0; } -static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy) +static void amd_pstate_update_min_max_limit(struct cpufreq_policy *policy) { - u8 max_limit_perf, min_limit_perf; struct amd_cpudata *cpudata = policy->driver_data; + union perf_cached perf = READ_ONCE(cpudata->perf); - max_limit_perf = freq_to_perf(cpudata, policy->max); - min_limit_perf = freq_to_perf(cpudata, policy->min); + perf.max_limit_perf = freq_to_perf(perf, cpudata->nominal_freq, policy->max); + perf.min_limit_perf = freq_to_perf(perf, cpudata->nominal_freq, policy->min); if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) - min_limit_perf = min(cpudata->nominal_perf, max_limit_perf); + perf.min_limit_perf = min(perf.nominal_perf, perf.max_limit_perf); - WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf); - WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf); WRITE_ONCE(cpudata->max_limit_freq, policy->max); WRITE_ONCE(cpudata->min_limit_freq, policy->min); - - return 0; + WRITE_ONCE(cpudata->perf, perf); } static int amd_pstate_update_freq(struct cpufreq_policy *policy, unsigned int target_freq, bool fast_switch) { struct cpufreq_freqs freqs; - struct amd_cpudata *cpudata = policy->driver_data; + struct amd_cpudata *cpudata; + union perf_cached perf; u8 des_perf; + cpudata = policy->driver_data; + if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq) amd_pstate_update_min_max_limit(policy); + perf = READ_ONCE(cpudata->perf); + freqs.old = policy->cur; freqs.new = target_freq; - des_perf = freq_to_perf(cpudata, target_freq); + des_perf = freq_to_perf(perf, cpudata->nominal_freq, target_freq); WARN_ON(fast_switch && !policy->fast_switch_enabled); /* @@ -645,8 +651,8 @@ static int amd_pstate_update_freq(struct cpufreq_policy *policy, if (!fast_switch) cpufreq_freq_transition_begin(policy, &freqs); - amd_pstate_update(cpudata, cpudata->min_limit_perf, des_perf, - cpudata->max_limit_perf, fast_switch, + amd_pstate_update(cpudata, perf.min_limit_perf, des_perf, + perf.max_limit_perf, fast_switch, policy->governor->flags); if (!fast_switch) @@ -675,9 +681,10 @@ static void amd_pstate_adjust_perf(unsigned int cpu, unsigned long target_perf, unsigned long capacity) { - u8 max_perf, min_perf, des_perf, cap_perf, min_limit_perf; + u8 max_perf, min_perf, des_perf, cap_perf; struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpu); struct amd_cpudata *cpudata; + union perf_cached perf; if (!policy) return; @@ -687,8 +694,8 @@ static void amd_pstate_adjust_perf(unsigned int cpu, if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq) amd_pstate_update_min_max_limit(policy); - cap_perf = READ_ONCE(cpudata->highest_perf); - min_limit_perf = READ_ONCE(cpudata->min_limit_perf); + perf = READ_ONCE(cpudata->perf); + cap_perf = perf.highest_perf; des_perf = cap_perf; if (target_perf < capacity) @@ -699,10 +706,10 @@ static void amd_pstate_adjust_perf(unsigned int cpu, else min_perf = cap_perf; - if (min_perf < min_limit_perf) - min_perf = min_limit_perf; + if (min_perf < perf.min_limit_perf) + min_perf = perf.min_limit_perf; - max_perf = cpudata->max_limit_perf; + max_perf = perf.max_limit_perf; if (max_perf < min_perf) max_perf = min_perf; @@ -713,11 +720,12 @@ static void amd_pstate_adjust_perf(unsigned int cpu, static int amd_pstate_cpu_boost_update(struct cpufreq_policy *policy, bool on) { struct amd_cpudata *cpudata = policy->driver_data; + union perf_cached perf = READ_ONCE(cpudata->perf); u32 nominal_freq, max_freq; int ret = 0; nominal_freq = READ_ONCE(cpudata->nominal_freq); - max_freq = perf_to_freq(cpudata, READ_ONCE(cpudata->highest_perf)); + max_freq = perf_to_freq(perf, cpudata->nominal_freq, perf.highest_perf); if (on) policy->cpuinfo.max_freq = max_freq; @@ -882,44 +890,44 @@ static u32 amd_pstate_get_transition_latency(unsigned int cpu) } /* - * amd_pstate_init_freq: Initialize the max_freq, min_freq, - * nominal_freq and lowest_nonlinear_freq for - * the @cpudata object. + * amd_pstate_init_freq: Initialize the nominal_freq and lowest_nonlinear_freq + * for the @cpudata object. * - * Requires: highest_perf, lowest_perf, nominal_perf and - * lowest_nonlinear_perf members of @cpudata to be - * initialized. + * Requires: all perf members of @cpudata to be initialized. * - * Returns 0 on success, non-zero value on failure. + * Returns 0 on success, non-zero value on failure. */ static int amd_pstate_init_freq(struct amd_cpudata *cpudata) { - int ret; - u32 min_freq, max_freq; - u32 nominal_freq, lowest_nonlinear_freq; + u32 min_freq, max_freq, nominal_freq, lowest_nonlinear_freq; struct cppc_perf_caps cppc_perf; + union perf_cached perf; + int ret; ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); if (ret) return ret; - - if (quirks && quirks->lowest_freq) - min_freq = quirks->lowest_freq; - else - min_freq = cppc_perf.lowest_freq; + perf = READ_ONCE(cpudata->perf); if (quirks && quirks->nominal_freq) nominal_freq = quirks->nominal_freq; else nominal_freq = cppc_perf.nominal_freq; + nominal_freq *= 1000; + + if (quirks && quirks->lowest_freq) { + min_freq = quirks->lowest_freq; + perf.lowest_perf = freq_to_perf(perf, nominal_freq, min_freq); + WRITE_ONCE(cpudata->perf, perf); + } else + min_freq = cppc_perf.lowest_freq; min_freq *= 1000; - nominal_freq *= 1000; WRITE_ONCE(cpudata->nominal_freq, nominal_freq); - max_freq = perf_to_freq(cpudata, cpudata->highest_perf); - lowest_nonlinear_freq = perf_to_freq(cpudata, cpudata->lowest_nonlinear_perf); + max_freq = perf_to_freq(perf, nominal_freq, perf.highest_perf); + lowest_nonlinear_freq = perf_to_freq(perf, nominal_freq, perf.lowest_nonlinear_perf); WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq); /** @@ -946,6 +954,7 @@ static int amd_pstate_init_freq(struct amd_cpudata *cpudata) static int amd_pstate_cpu_init(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata; + union perf_cached perf; struct device *dev; int ret; @@ -981,8 +990,14 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy) policy->cpuinfo.transition_latency = amd_pstate_get_transition_latency(policy->cpu); policy->transition_delay_us = amd_pstate_get_transition_delay_us(policy->cpu); - policy->cpuinfo.min_freq = policy->min = perf_to_freq(cpudata, cpudata->lowest_perf); - policy->cpuinfo.max_freq = policy->max = perf_to_freq(cpudata, cpudata->highest_perf); + perf = READ_ONCE(cpudata->perf); + + policy->cpuinfo.min_freq = policy->min = perf_to_freq(perf, + cpudata->nominal_freq, + perf.lowest_perf); + policy->cpuinfo.max_freq = policy->max = perf_to_freq(perf, + cpudata->nominal_freq, + perf.highest_perf); policy->boost_enabled = READ_ONCE(cpudata->boost_supported); @@ -1063,23 +1078,27 @@ static int amd_pstate_cpu_suspend(struct cpufreq_policy *policy) static ssize_t show_amd_pstate_max_freq(struct cpufreq_policy *policy, char *buf) { - struct amd_cpudata *cpudata = policy->driver_data; + struct amd_cpudata *cpudata; + union perf_cached perf; + cpudata = policy->driver_data; + perf = READ_ONCE(cpudata->perf); - return sysfs_emit(buf, "%u\n", perf_to_freq(cpudata, READ_ONCE(cpudata->highest_perf))); + return sysfs_emit(buf, "%u\n", + perf_to_freq(perf, cpudata->nominal_freq, perf.highest_perf)); } static ssize_t show_amd_pstate_lowest_nonlinear_freq(struct cpufreq_policy *policy, char *buf) { - int freq; - struct amd_cpudata *cpudata = policy->driver_data; + struct amd_cpudata *cpudata; + union perf_cached perf; - freq = READ_ONCE(cpudata->lowest_nonlinear_freq); - if (freq < 0) - return freq; + cpudata = policy->driver_data; + perf = READ_ONCE(cpudata->perf); - return sysfs_emit(buf, "%u\n", freq); + return sysfs_emit(buf, "%u\n", + perf_to_freq(perf, cpudata->nominal_freq, perf.lowest_nonlinear_perf)); } /* @@ -1089,12 +1108,11 @@ static ssize_t show_amd_pstate_lowest_nonlinear_freq(struct cpufreq_policy *poli static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy, char *buf) { - u8 perf; - struct amd_cpudata *cpudata = policy->driver_data; + struct amd_cpudata *cpudata; - perf = READ_ONCE(cpudata->highest_perf); + cpudata = policy->driver_data; - return sysfs_emit(buf, "%u\n", perf); + return sysfs_emit(buf, "%u\n", cpudata->perf.highest_perf); } static ssize_t show_amd_pstate_prefcore_ranking(struct cpufreq_policy *policy, @@ -1425,6 +1443,7 @@ static bool amd_pstate_acpi_pm_profile_undefined(void) static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata; + union perf_cached perf; struct device *dev; u64 value; int ret; @@ -1458,8 +1477,15 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) if (ret) goto free_cpudata1; - policy->cpuinfo.min_freq = policy->min = perf_to_freq(cpudata, cpudata->lowest_perf); - policy->cpuinfo.max_freq = policy->max = perf_to_freq(cpudata, cpudata->highest_perf); + perf = READ_ONCE(cpudata->perf); + + policy->cpuinfo.min_freq = policy->min = perf_to_freq(perf, + cpudata->nominal_freq, + perf.lowest_perf); + policy->cpuinfo.max_freq = policy->max = perf_to_freq(perf, + cpudata->nominal_freq, + perf.highest_perf); + /* It will be updated by governor */ policy->cur = policy->cpuinfo.min_freq; @@ -1520,6 +1546,7 @@ static void amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy) static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata = policy->driver_data; + union perf_cached perf; u8 epp; if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq) @@ -1530,15 +1557,16 @@ static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy) else epp = READ_ONCE(cpudata->epp_cached); + perf = READ_ONCE(cpudata->perf); if (trace_amd_pstate_epp_perf_enabled()) { - trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, epp, - cpudata->min_limit_perf, - cpudata->max_limit_perf, + trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf, epp, + perf.min_limit_perf, + perf.max_limit_perf, policy->boost_enabled); } - return amd_pstate_update_perf(cpudata, cpudata->min_limit_perf, 0U, - cpudata->max_limit_perf, epp, false); + return amd_pstate_update_perf(cpudata, perf.min_limit_perf, 0U, + perf.max_limit_perf, epp, false); } static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) @@ -1570,20 +1598,18 @@ static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) static int amd_pstate_epp_reenable(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata = policy->driver_data; - u8 max_perf; + union perf_cached perf = READ_ONCE(cpudata->perf); int ret; ret = amd_pstate_cppc_enable(true); if (ret) pr_err("failed to enable amd pstate during resume, return %d\n", ret); - max_perf = READ_ONCE(cpudata->highest_perf); - if (trace_amd_pstate_epp_perf_enabled()) { - trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, + trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf, cpudata->epp_cached, FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cpudata->cppc_req_cached), - max_perf, policy->boost_enabled); + perf.highest_perf, policy->boost_enabled); } return amd_pstate_epp_update_limit(policy); @@ -1607,22 +1633,21 @@ static int amd_pstate_epp_cpu_online(struct cpufreq_policy *policy) static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata = policy->driver_data; - u8 min_perf; + union perf_cached perf = READ_ONCE(cpudata->perf); if (cpudata->suspended) return 0; - min_perf = READ_ONCE(cpudata->lowest_perf); - guard(mutex)(&amd_pstate_limits_lock); if (trace_amd_pstate_epp_perf_enabled()) { - trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, + trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf, AMD_CPPC_EPP_BALANCE_POWERSAVE, - min_perf, min_perf, policy->boost_enabled); + perf.lowest_perf, perf.lowest_perf, + policy->boost_enabled); } - return amd_pstate_update_perf(cpudata, min_perf, 0, min_perf, + return amd_pstate_update_perf(cpudata, perf.lowest_perf, 0, perf.lowest_perf, AMD_CPPC_EPP_BALANCE_POWERSAVE, false); } diff --git a/drivers/cpufreq/amd-pstate.h b/drivers/cpufreq/amd-pstate.h index 014993369245..83532a0079a8 100644 --- a/drivers/cpufreq/amd-pstate.h +++ b/drivers/cpufreq/amd-pstate.h @@ -13,6 +13,36 @@ /********************************************************************* * AMD P-state INTERFACE * *********************************************************************/ + +/** + * union perf_cached - A union to cache performance-related data. + * @highest_perf: the maximum performance an individual processor may reach, + * assuming ideal conditions + * For platforms that support the preferred core feature, the highest_perf value maybe + * configured to any value in the range 166-255 by the firmware (because the preferred + * core ranking is encoded in the highest_perf value). To maintain consistency across + * all platforms, we split the highest_perf and preferred core ranking values into + * cpudata->perf.highest_perf and cpudata->prefcore_ranking. + * @nominal_perf: the maximum sustained performance level of the processor, + * assuming ideal operating conditions + * @lowest_nonlinear_perf: the lowest performance level at which nonlinear power + * savings are achieved + * @lowest_perf: the absolute lowest performance level of the processor + * @min_limit_perf: Cached value of the performance corresponding to policy->min + * @max_limit_perf: Cached value of the performance corresponding to policy->max + */ +union perf_cached { + struct { + u8 highest_perf; + u8 nominal_perf; + u8 lowest_nonlinear_perf; + u8 lowest_perf; + u8 min_limit_perf; + u8 max_limit_perf; + }; + u64 val; +}; + /** * struct amd_aperf_mperf * @aperf: actual performance frequency clock count @@ -30,20 +60,9 @@ struct amd_aperf_mperf { * @cpu: CPU number * @req: constraint request to apply * @cppc_req_cached: cached performance request hints - * @highest_perf: the maximum performance an individual processor may reach, - * assuming ideal conditions - * For platforms that do not support the preferred core feature, the - * highest_pef may be configured with 166 or 255, to avoid max frequency - * calculated wrongly. we take the fixed value as the highest_perf. - * @nominal_perf: the maximum sustained performance level of the processor, - * assuming ideal operating conditions - * @lowest_nonlinear_perf: the lowest performance level at which nonlinear power - * savings are achieved - * @lowest_perf: the absolute lowest performance level of the processor + * @perf: cached performance-related data * @prefcore_ranking: the preferred core ranking, the higher value indicates a higher * priority. - * @min_limit_perf: Cached value of the performance corresponding to policy->min - * @max_limit_perf: Cached value of the performance corresponding to policy->max * @min_limit_freq: Cached value of policy->min (in khz) * @max_limit_freq: Cached value of policy->max (in khz) * @nominal_freq: the frequency (in khz) that mapped to nominal_perf @@ -68,13 +87,9 @@ struct amd_cpudata { struct freq_qos_request req[2]; u64 cppc_req_cached; - u8 highest_perf; - u8 nominal_perf; - u8 lowest_nonlinear_perf; - u8 lowest_perf; + union perf_cached perf; + u8 prefcore_ranking; - u8 min_limit_perf; - u8 max_limit_perf; u32 min_limit_freq; u32 max_limit_freq; u32 nominal_freq; From 6f0b13f16f7a214996a8a125080a6a99bda5d1f7 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 6 Dec 2024 23:20:04 -0600 Subject: [PATCH 15/29] cpufreq/amd-pstate: Overhaul locking amd_pstate_cpu_boost_update() and refresh_frequency_limits() both update the policy state and have nothing to do with the amd-pstate driver itself. A global "limits" lock doesn't make sense because each CPU can have policies changed independently. Each time a CPU changes values they will atomically be written to the per-CPU perf member. Drop per CPU locking cases. The remaining "global" driver lock is used to ensure that only one entity can change driver modes at a given time. Reviewed-by: Gautham R. Shenoy Reviewed-by: Dhananjay Ugwekar Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 983c8728701e..65714d510ce3 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -196,7 +196,6 @@ static inline int get_mode_idx_from_str(const char *str, size_t size) return -EINVAL; } -static DEFINE_MUTEX(amd_pstate_limits_lock); static DEFINE_MUTEX(amd_pstate_driver_lock); static u8 msr_get_epp(struct amd_cpudata *cpudata) @@ -752,7 +751,6 @@ static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state) pr_err("Boost mode is not supported by this processor or SBIOS\n"); return -EOPNOTSUPP; } - guard(mutex)(&amd_pstate_driver_lock); ret = amd_pstate_cpu_boost_update(policy, state); refresh_frequency_limits(policy); @@ -1170,8 +1168,6 @@ static ssize_t store_energy_performance_preference( if (ret < 0) return -EINVAL; - guard(mutex)(&amd_pstate_limits_lock); - ret = amd_pstate_set_energy_pref_index(policy, ret); return ret ? ret : count; @@ -1344,8 +1340,10 @@ int amd_pstate_update_status(const char *buf, size_t size) if (mode_idx < 0 || mode_idx >= AMD_PSTATE_MAX) return -EINVAL; - if (mode_state_machine[cppc_state][mode_idx]) + if (mode_state_machine[cppc_state][mode_idx]) { + guard(mutex)(&amd_pstate_driver_lock); return mode_state_machine[cppc_state][mode_idx](mode_idx); + } return 0; } @@ -1366,7 +1364,6 @@ static ssize_t status_store(struct device *a, struct device_attribute *b, char *p = memchr(buf, '\n', count); int ret; - guard(mutex)(&amd_pstate_driver_lock); ret = amd_pstate_update_status(buf, p ? p - buf : count); return ret < 0 ? ret : count; @@ -1638,8 +1635,6 @@ static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy) if (cpudata->suspended) return 0; - guard(mutex)(&amd_pstate_limits_lock); - if (trace_amd_pstate_epp_perf_enabled()) { trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf, AMD_CPPC_EPP_BALANCE_POWERSAVE, @@ -1679,8 +1674,6 @@ static int amd_pstate_epp_resume(struct cpufreq_policy *policy) struct amd_cpudata *cpudata = policy->driver_data; if (cpudata->suspended) { - guard(mutex)(&amd_pstate_limits_lock); - /* enable amd pstate from suspend state*/ amd_pstate_epp_reenable(policy); From f458cf79d73b144d900e0c274b9eb8a2f3641a0e Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Sat, 7 Dec 2024 00:28:41 -0600 Subject: [PATCH 16/29] cpufreq/amd-pstate: Drop `cppc_cap1_cached` The `cppc_cap1_cached` variable isn't used at all, there is no need to read it at initialization for each CPU. Reviewed-by: Gautham R. Shenoy Reviewed-by: Dhananjay Ugwekar Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 5 ----- drivers/cpufreq/amd-pstate.h | 2 -- 2 files changed, 7 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 65714d510ce3..6f1a3056bcbd 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -1508,11 +1508,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) if (ret) return ret; WRITE_ONCE(cpudata->cppc_req_cached, value); - - ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, &value); - if (ret) - return ret; - WRITE_ONCE(cpudata->cppc_cap1_cached, value); } ret = amd_pstate_set_epp(cpudata, cpudata->epp_default); if (ret) diff --git a/drivers/cpufreq/amd-pstate.h b/drivers/cpufreq/amd-pstate.h index 83532a0079a8..1557e1afea79 100644 --- a/drivers/cpufreq/amd-pstate.h +++ b/drivers/cpufreq/amd-pstate.h @@ -76,7 +76,6 @@ struct amd_aperf_mperf { * AMD P-State driver supports preferred core featue. * @epp_cached: Cached CPPC energy-performance preference value * @policy: Cpufreq policy value - * @cppc_cap1_cached Cached MSR_AMD_CPPC_CAP1 register value * * The amd_cpudata is key private data for each CPU thread in AMD P-State, and * represents all the attributes and goals that AMD P-State requests at runtime. @@ -105,7 +104,6 @@ struct amd_cpudata { /* EPP feature related attributes*/ u8 epp_cached; u32 policy; - u64 cppc_cap1_cached; bool suspended; u8 epp_default; }; From 93984d3cea8ab5f2631ec5ba491ce4e47218ab7b Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Sat, 7 Dec 2024 23:42:48 -0600 Subject: [PATCH 17/29] cpufreq/amd-pstate-ut: Use _free macro to free put policy Using a scoped cleanup macro simplifies cleanup code. Reviewed-by: Dhananjay Ugwekar Reviewed-by: Gautham R. Shenoy Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate-ut.c | 33 ++++++++++++++------------------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/drivers/cpufreq/amd-pstate-ut.c b/drivers/cpufreq/amd-pstate-ut.c index 5f6a92a816e6..e02672e67380 100644 --- a/drivers/cpufreq/amd-pstate-ut.c +++ b/drivers/cpufreq/amd-pstate-ut.c @@ -26,6 +26,7 @@ #include #include #include +#include #include @@ -127,11 +128,12 @@ static void amd_pstate_ut_check_perf(u32 index) u32 highest_perf = 0, nominal_perf = 0, lowest_nonlinear_perf = 0, lowest_perf = 0; u64 cap1 = 0; struct cppc_perf_caps cppc_perf; - struct cpufreq_policy *policy = NULL; struct amd_cpudata *cpudata = NULL; union perf_cached cur_perf; for_each_possible_cpu(cpu) { + struct cpufreq_policy *policy __free(put_cpufreq_policy) = NULL; + policy = cpufreq_cpu_get(cpu); if (!policy) break; @@ -142,7 +144,7 @@ static void amd_pstate_ut_check_perf(u32 index) if (ret) { amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; pr_err("%s cppc_get_perf_caps ret=%d error!\n", __func__, ret); - goto skip_test; + return; } highest_perf = cppc_perf.highest_perf; @@ -154,7 +156,7 @@ static void amd_pstate_ut_check_perf(u32 index) if (ret) { amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; pr_err("%s read CPPC_CAP1 ret=%d error!\n", __func__, ret); - goto skip_test; + return; } highest_perf = AMD_CPPC_HIGHEST_PERF(cap1); @@ -167,7 +169,7 @@ static void amd_pstate_ut_check_perf(u32 index) if (highest_perf != cur_perf.highest_perf && !cpudata->hw_prefcore) { pr_err("%s cpu%d highest=%d %d highest perf doesn't match\n", __func__, cpu, highest_perf, cur_perf.highest_perf); - goto skip_test; + return; } if (nominal_perf != cur_perf.nominal_perf || (lowest_nonlinear_perf != cur_perf.lowest_nonlinear_perf) || @@ -177,7 +179,7 @@ static void amd_pstate_ut_check_perf(u32 index) __func__, cpu, nominal_perf, cur_perf.nominal_perf, lowest_nonlinear_perf, cur_perf.lowest_nonlinear_perf, lowest_perf, cur_perf.lowest_perf); - goto skip_test; + return; } if (!((highest_perf >= nominal_perf) && @@ -188,15 +190,11 @@ static void amd_pstate_ut_check_perf(u32 index) pr_err("%s cpu%d highest=%d >= nominal=%d > lowest_nonlinear=%d > lowest=%d > 0, the formula is incorrect!\n", __func__, cpu, highest_perf, nominal_perf, lowest_nonlinear_perf, lowest_perf); - goto skip_test; + return; } - cpufreq_cpu_put(policy); } amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS; - return; -skip_test: - cpufreq_cpu_put(policy); } /* @@ -207,10 +205,11 @@ static void amd_pstate_ut_check_perf(u32 index) static void amd_pstate_ut_check_freq(u32 index) { int cpu = 0; - struct cpufreq_policy *policy = NULL; struct amd_cpudata *cpudata = NULL; for_each_possible_cpu(cpu) { + struct cpufreq_policy *policy __free(put_cpufreq_policy) = NULL; + policy = cpufreq_cpu_get(cpu); if (!policy) break; @@ -224,14 +223,14 @@ static void amd_pstate_ut_check_freq(u32 index) pr_err("%s cpu%d max=%d >= nominal=%d > lowest_nonlinear=%d > min=%d > 0, the formula is incorrect!\n", __func__, cpu, policy->cpuinfo.max_freq, cpudata->nominal_freq, cpudata->lowest_nonlinear_freq, policy->cpuinfo.min_freq); - goto skip_test; + return; } if (cpudata->lowest_nonlinear_freq != policy->min) { amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; pr_err("%s cpu%d cpudata_lowest_nonlinear_freq=%d policy_min=%d, they should be equal!\n", __func__, cpu, cpudata->lowest_nonlinear_freq, policy->min); - goto skip_test; + return; } if (cpudata->boost_supported) { @@ -243,20 +242,16 @@ static void amd_pstate_ut_check_freq(u32 index) pr_err("%s cpu%d policy_max=%d should be equal cpu_max=%d or cpu_nominal=%d !\n", __func__, cpu, policy->max, policy->cpuinfo.max_freq, cpudata->nominal_freq); - goto skip_test; + return; } } else { amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; pr_err("%s cpu%d must support boost!\n", __func__, cpu); - goto skip_test; + return; } - cpufreq_cpu_put(policy); } amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS; - return; -skip_test: - cpufreq_cpu_put(policy); } static int amd_pstate_set_mode(enum amd_pstate_mode mode) From 66030cc1c533e26bb4dbe8a356090f24dd734801 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 14 Feb 2025 12:59:40 -0600 Subject: [PATCH 18/29] cpufreq/amd-pstate-ut: Allow lowest nonlinear and lowest to be the same Several Ryzen AI processors support the exact same value for lowest nonlinear perf and lowest perf. Loosen up the unit tests to allow this scenario. Reviewed-by: Gautham R. Shenoy Reviewed-by: Dhananjay Ugwekar Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate-ut.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/amd-pstate-ut.c b/drivers/cpufreq/amd-pstate-ut.c index e02672e67380..b3693a4c28d2 100644 --- a/drivers/cpufreq/amd-pstate-ut.c +++ b/drivers/cpufreq/amd-pstate-ut.c @@ -184,7 +184,7 @@ static void amd_pstate_ut_check_perf(u32 index) if (!((highest_perf >= nominal_perf) && (nominal_perf > lowest_nonlinear_perf) && - (lowest_nonlinear_perf > lowest_perf) && + (lowest_nonlinear_perf >= lowest_perf) && (lowest_perf > 0))) { amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; pr_err("%s cpu%d highest=%d >= nominal=%d > lowest_nonlinear=%d > lowest=%d > 0, the formula is incorrect!\n", @@ -217,7 +217,7 @@ static void amd_pstate_ut_check_freq(u32 index) if (!((policy->cpuinfo.max_freq >= cpudata->nominal_freq) && (cpudata->nominal_freq > cpudata->lowest_nonlinear_freq) && - (cpudata->lowest_nonlinear_freq > policy->cpuinfo.min_freq) && + (cpudata->lowest_nonlinear_freq >= policy->cpuinfo.min_freq) && (policy->cpuinfo.min_freq > 0))) { amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; pr_err("%s cpu%d max=%d >= nominal=%d > lowest_nonlinear=%d > min=%d > 0, the formula is incorrect!\n", From a7875346c6891063b83cd59dc3bd23113e4debf5 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 14 Feb 2025 16:31:25 -0600 Subject: [PATCH 19/29] cpufreq/amd-pstate-ut: Drop SUCCESS and FAIL enums Enums are effectively used as a boolean and don't show the return value of the failing call. Instead of using enums switch to returning the actual return code from the unit test. Reviewed-by: Gautham R. Shenoy Reviewed-by: Dhananjay Ugwekar Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate-ut.c | 149 +++++++++++++------------------- 1 file changed, 58 insertions(+), 91 deletions(-) diff --git a/drivers/cpufreq/amd-pstate-ut.c b/drivers/cpufreq/amd-pstate-ut.c index b3693a4c28d2..cd9a472e8dc3 100644 --- a/drivers/cpufreq/amd-pstate-ut.c +++ b/drivers/cpufreq/amd-pstate-ut.c @@ -32,30 +32,20 @@ #include "amd-pstate.h" -/* - * Abbreviations: - * amd_pstate_ut: used as a shortform for AMD P-State unit test. - * It helps to keep variable names smaller, simpler - */ -enum amd_pstate_ut_result { - AMD_PSTATE_UT_RESULT_PASS, - AMD_PSTATE_UT_RESULT_FAIL, -}; struct amd_pstate_ut_struct { const char *name; - void (*func)(u32 index); - enum amd_pstate_ut_result result; + int (*func)(u32 index); }; /* * Kernel module for testing the AMD P-State unit test */ -static void amd_pstate_ut_acpi_cpc_valid(u32 index); -static void amd_pstate_ut_check_enabled(u32 index); -static void amd_pstate_ut_check_perf(u32 index); -static void amd_pstate_ut_check_freq(u32 index); -static void amd_pstate_ut_check_driver(u32 index); +static int amd_pstate_ut_acpi_cpc_valid(u32 index); +static int amd_pstate_ut_check_enabled(u32 index); +static int amd_pstate_ut_check_perf(u32 index); +static int amd_pstate_ut_check_freq(u32 index); +static int amd_pstate_ut_check_driver(u32 index); static struct amd_pstate_ut_struct amd_pstate_ut_cases[] = { {"amd_pstate_ut_acpi_cpc_valid", amd_pstate_ut_acpi_cpc_valid }, @@ -78,51 +68,46 @@ static bool get_shared_mem(void) /* * check the _CPC object is present in SBIOS. */ -static void amd_pstate_ut_acpi_cpc_valid(u32 index) +static int amd_pstate_ut_acpi_cpc_valid(u32 index) { - if (acpi_cpc_valid()) - amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS; - else { - amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; + if (!acpi_cpc_valid()) { pr_err("%s the _CPC object is not present in SBIOS!\n", __func__); + return -EINVAL; } -} -static void amd_pstate_ut_pstate_enable(u32 index) -{ - int ret = 0; - u64 cppc_enable = 0; - - ret = rdmsrl_safe(MSR_AMD_CPPC_ENABLE, &cppc_enable); - if (ret) { - amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; - pr_err("%s rdmsrl_safe MSR_AMD_CPPC_ENABLE ret=%d error!\n", __func__, ret); - return; - } - if (cppc_enable) - amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS; - else { - amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; - pr_err("%s amd pstate must be enabled!\n", __func__); - } + return 0; } /* * check if amd pstate is enabled */ -static void amd_pstate_ut_check_enabled(u32 index) +static int amd_pstate_ut_check_enabled(u32 index) { + u64 cppc_enable = 0; + int ret; + if (get_shared_mem()) - amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS; - else - amd_pstate_ut_pstate_enable(index); + return 0; + + ret = rdmsrl_safe(MSR_AMD_CPPC_ENABLE, &cppc_enable); + if (ret) { + pr_err("%s rdmsrl_safe MSR_AMD_CPPC_ENABLE ret=%d error!\n", __func__, ret); + return ret; + } + + if (!cppc_enable) { + pr_err("%s amd pstate must be enabled!\n", __func__); + return -EINVAL; + } + + return 0; } /* * check if performance values are reasonable. * highest_perf >= nominal_perf > lowest_nonlinear_perf > lowest_perf > 0 */ -static void amd_pstate_ut_check_perf(u32 index) +static int amd_pstate_ut_check_perf(u32 index) { int cpu = 0, ret = 0; u32 highest_perf = 0, nominal_perf = 0, lowest_nonlinear_perf = 0, lowest_perf = 0; @@ -142,9 +127,8 @@ static void amd_pstate_ut_check_perf(u32 index) if (get_shared_mem()) { ret = cppc_get_perf_caps(cpu, &cppc_perf); if (ret) { - amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; pr_err("%s cppc_get_perf_caps ret=%d error!\n", __func__, ret); - return; + return ret; } highest_perf = cppc_perf.highest_perf; @@ -154,9 +138,8 @@ static void amd_pstate_ut_check_perf(u32 index) } else { ret = rdmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_CAP1, &cap1); if (ret) { - amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; pr_err("%s read CPPC_CAP1 ret=%d error!\n", __func__, ret); - return; + return ret; } highest_perf = AMD_CPPC_HIGHEST_PERF(cap1); @@ -169,32 +152,30 @@ static void amd_pstate_ut_check_perf(u32 index) if (highest_perf != cur_perf.highest_perf && !cpudata->hw_prefcore) { pr_err("%s cpu%d highest=%d %d highest perf doesn't match\n", __func__, cpu, highest_perf, cur_perf.highest_perf); - return; + return -EINVAL; } if (nominal_perf != cur_perf.nominal_perf || (lowest_nonlinear_perf != cur_perf.lowest_nonlinear_perf) || (lowest_perf != cur_perf.lowest_perf)) { - amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; pr_err("%s cpu%d nominal=%d %d lowest_nonlinear=%d %d lowest=%d %d, they should be equal!\n", __func__, cpu, nominal_perf, cur_perf.nominal_perf, lowest_nonlinear_perf, cur_perf.lowest_nonlinear_perf, lowest_perf, cur_perf.lowest_perf); - return; + return -EINVAL; } if (!((highest_perf >= nominal_perf) && (nominal_perf > lowest_nonlinear_perf) && (lowest_nonlinear_perf >= lowest_perf) && (lowest_perf > 0))) { - amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; pr_err("%s cpu%d highest=%d >= nominal=%d > lowest_nonlinear=%d > lowest=%d > 0, the formula is incorrect!\n", __func__, cpu, highest_perf, nominal_perf, lowest_nonlinear_perf, lowest_perf); - return; + return -EINVAL; } } - amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS; + return 0; } /* @@ -202,7 +183,7 @@ static void amd_pstate_ut_check_perf(u32 index) * max_freq >= nominal_freq > lowest_nonlinear_freq > min_freq > 0 * check max freq when set support boost mode. */ -static void amd_pstate_ut_check_freq(u32 index) +static int amd_pstate_ut_check_freq(u32 index) { int cpu = 0; struct amd_cpudata *cpudata = NULL; @@ -219,39 +200,33 @@ static void amd_pstate_ut_check_freq(u32 index) (cpudata->nominal_freq > cpudata->lowest_nonlinear_freq) && (cpudata->lowest_nonlinear_freq >= policy->cpuinfo.min_freq) && (policy->cpuinfo.min_freq > 0))) { - amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; pr_err("%s cpu%d max=%d >= nominal=%d > lowest_nonlinear=%d > min=%d > 0, the formula is incorrect!\n", __func__, cpu, policy->cpuinfo.max_freq, cpudata->nominal_freq, cpudata->lowest_nonlinear_freq, policy->cpuinfo.min_freq); - return; + return -EINVAL; } if (cpudata->lowest_nonlinear_freq != policy->min) { - amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; pr_err("%s cpu%d cpudata_lowest_nonlinear_freq=%d policy_min=%d, they should be equal!\n", __func__, cpu, cpudata->lowest_nonlinear_freq, policy->min); - return; + return -EINVAL; } if (cpudata->boost_supported) { - if ((policy->max == policy->cpuinfo.max_freq) || - (policy->max == cpudata->nominal_freq)) - amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS; - else { - amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; + if ((policy->max != policy->cpuinfo.max_freq) && + (policy->max != cpudata->nominal_freq)) { pr_err("%s cpu%d policy_max=%d should be equal cpu_max=%d or cpu_nominal=%d !\n", __func__, cpu, policy->max, policy->cpuinfo.max_freq, cpudata->nominal_freq); - return; + return -EINVAL; } } else { - amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL; pr_err("%s cpu%d must support boost!\n", __func__, cpu); - return; + return -EINVAL; } } - amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS; + return 0; } static int amd_pstate_set_mode(enum amd_pstate_mode mode) @@ -263,32 +238,28 @@ static int amd_pstate_set_mode(enum amd_pstate_mode mode) return amd_pstate_update_status(mode_str, strlen(mode_str)); } -static void amd_pstate_ut_check_driver(u32 index) +static int amd_pstate_ut_check_driver(u32 index) { enum amd_pstate_mode mode1, mode2 = AMD_PSTATE_DISABLE; - int ret; for (mode1 = AMD_PSTATE_DISABLE; mode1 < AMD_PSTATE_MAX; mode1++) { - ret = amd_pstate_set_mode(mode1); + int ret = amd_pstate_set_mode(mode1); if (ret) - goto out; + return ret; for (mode2 = AMD_PSTATE_DISABLE; mode2 < AMD_PSTATE_MAX; mode2++) { if (mode1 == mode2) continue; ret = amd_pstate_set_mode(mode2); - if (ret) - goto out; + if (ret) { + pr_err("%s: failed to update status for %s->%s\n", __func__, + amd_pstate_get_mode_string(mode1), + amd_pstate_get_mode_string(mode2)); + return ret; + } } } -out: - if (ret) - pr_warn("%s: failed to update status for %s->%s: %d\n", __func__, - amd_pstate_get_mode_string(mode1), - amd_pstate_get_mode_string(mode2), ret); - amd_pstate_ut_cases[index].result = ret ? - AMD_PSTATE_UT_RESULT_FAIL : - AMD_PSTATE_UT_RESULT_PASS; + return 0; } static int __init amd_pstate_ut_init(void) @@ -296,16 +267,12 @@ static int __init amd_pstate_ut_init(void) u32 i = 0, arr_size = ARRAY_SIZE(amd_pstate_ut_cases); for (i = 0; i < arr_size; i++) { - amd_pstate_ut_cases[i].func(i); - switch (amd_pstate_ut_cases[i].result) { - case AMD_PSTATE_UT_RESULT_PASS: + int ret = amd_pstate_ut_cases[i].func(i); + + if (ret) + pr_err("%-4d %-20s\t fail: %d!\n", i+1, amd_pstate_ut_cases[i].name, ret); + else pr_info("%-4d %-20s\t success!\n", i+1, amd_pstate_ut_cases[i].name); - break; - case AMD_PSTATE_UT_RESULT_FAIL: - default: - pr_info("%-4d %-20s\t fail!\n", i+1, amd_pstate_ut_cases[i].name); - break; - } } return 0; From 2aac38ac06cb751bd3e672a4fb1eb3073f1866ab Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 14 Feb 2025 16:33:12 -0600 Subject: [PATCH 20/29] cpufreq/amd-pstate-ut: Run on all of the correct CPUs If a CPU is missing a policy or one has been offlined then the unit test is skipped for the rest of the CPUs on the system. Instead; iterate online CPUs and skip any missing policies to allow continuing to test the rest of them. Reviewed-by: Gautham R. Shenoy Reviewed-by: Dhananjay Ugwekar Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate-ut.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/amd-pstate-ut.c b/drivers/cpufreq/amd-pstate-ut.c index cd9a472e8dc3..2ab3017d7a0b 100644 --- a/drivers/cpufreq/amd-pstate-ut.c +++ b/drivers/cpufreq/amd-pstate-ut.c @@ -116,12 +116,12 @@ static int amd_pstate_ut_check_perf(u32 index) struct amd_cpudata *cpudata = NULL; union perf_cached cur_perf; - for_each_possible_cpu(cpu) { + for_each_online_cpu(cpu) { struct cpufreq_policy *policy __free(put_cpufreq_policy) = NULL; policy = cpufreq_cpu_get(cpu); if (!policy) - break; + continue; cpudata = policy->driver_data; if (get_shared_mem()) { @@ -188,12 +188,12 @@ static int amd_pstate_ut_check_freq(u32 index) int cpu = 0; struct amd_cpudata *cpudata = NULL; - for_each_possible_cpu(cpu) { + for_each_online_cpu(cpu) { struct cpufreq_policy *policy __free(put_cpufreq_policy) = NULL; policy = cpufreq_cpu_get(cpu); if (!policy) - break; + continue; cpudata = policy->driver_data; if (!((policy->cpuinfo.max_freq >= cpudata->nominal_freq) && From c630458c7a4b990742905e312af20a7c3260ca14 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 14 Feb 2025 16:34:23 -0600 Subject: [PATCH 21/29] cpufreq/amd-pstate-ut: Adjust variable scope In amd_pstate_ut_check_freq() and amd_pstate_ut_check_perf() the cpudata variable is only needed in the scope of the for loop. Move it there. Reviewed-by: Gautham R. Shenoy Reviewed-by: Dhananjay Ugwekar Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate-ut.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/amd-pstate-ut.c b/drivers/cpufreq/amd-pstate-ut.c index 2ab3017d7a0b..edc1475989e3 100644 --- a/drivers/cpufreq/amd-pstate-ut.c +++ b/drivers/cpufreq/amd-pstate-ut.c @@ -113,11 +113,11 @@ static int amd_pstate_ut_check_perf(u32 index) u32 highest_perf = 0, nominal_perf = 0, lowest_nonlinear_perf = 0, lowest_perf = 0; u64 cap1 = 0; struct cppc_perf_caps cppc_perf; - struct amd_cpudata *cpudata = NULL; union perf_cached cur_perf; for_each_online_cpu(cpu) { struct cpufreq_policy *policy __free(put_cpufreq_policy) = NULL; + struct amd_cpudata *cpudata; policy = cpufreq_cpu_get(cpu); if (!policy) @@ -186,10 +186,10 @@ static int amd_pstate_ut_check_perf(u32 index) static int amd_pstate_ut_check_freq(u32 index) { int cpu = 0; - struct amd_cpudata *cpudata = NULL; for_each_online_cpu(cpu) { struct cpufreq_policy *policy __free(put_cpufreq_policy) = NULL; + struct amd_cpudata *cpudata; policy = cpufreq_cpu_get(cpu); if (!policy) From b4cc466b973590114b4bf49025d362ad9618a23e Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Sat, 7 Dec 2024 22:38:06 -0600 Subject: [PATCH 22/29] cpufreq/amd-pstate: Replace all AMD_CPPC_* macros with masks Bitfield masks are easier to follow and less error prone. Reviewed-by: Dhananjay Ugwekar Reviewed-by: Gautham R. Shenoy Signed-off-by: Mario Limonciello --- arch/x86/include/asm/msr-index.h | 18 ++++++++++-------- arch/x86/kernel/acpi/cppc.c | 4 +++- drivers/cpufreq/amd-pstate-ut.c | 9 +++++---- drivers/cpufreq/amd-pstate.c | 16 ++++++---------- 4 files changed, 24 insertions(+), 23 deletions(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 72765b2fe0d8..fc2634cc48fd 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -701,15 +701,17 @@ #define MSR_AMD_CPPC_REQ 0xc00102b3 #define MSR_AMD_CPPC_STATUS 0xc00102b4 -#define AMD_CPPC_LOWEST_PERF(x) (((x) >> 0) & 0xff) -#define AMD_CPPC_LOWNONLIN_PERF(x) (((x) >> 8) & 0xff) -#define AMD_CPPC_NOMINAL_PERF(x) (((x) >> 16) & 0xff) -#define AMD_CPPC_HIGHEST_PERF(x) (((x) >> 24) & 0xff) +/* Masks for use with MSR_AMD_CPPC_CAP1 */ +#define AMD_CPPC_LOWEST_PERF_MASK GENMASK(7, 0) +#define AMD_CPPC_LOWNONLIN_PERF_MASK GENMASK(15, 8) +#define AMD_CPPC_NOMINAL_PERF_MASK GENMASK(23, 16) +#define AMD_CPPC_HIGHEST_PERF_MASK GENMASK(31, 24) -#define AMD_CPPC_MAX_PERF(x) (((x) & 0xff) << 0) -#define AMD_CPPC_MIN_PERF(x) (((x) & 0xff) << 8) -#define AMD_CPPC_DES_PERF(x) (((x) & 0xff) << 16) -#define AMD_CPPC_ENERGY_PERF_PREF(x) (((x) & 0xff) << 24) +/* Masks for use with MSR_AMD_CPPC_REQ */ +#define AMD_CPPC_MAX_PERF_MASK GENMASK(7, 0) +#define AMD_CPPC_MIN_PERF_MASK GENMASK(15, 8) +#define AMD_CPPC_DES_PERF_MASK GENMASK(23, 16) +#define AMD_CPPC_EPP_PERF_MASK GENMASK(31, 24) /* AMD Performance Counter Global Status and Control MSRs */ #define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS 0xc0000300 diff --git a/arch/x86/kernel/acpi/cppc.c b/arch/x86/kernel/acpi/cppc.c index d745dd586303..77bfb846490c 100644 --- a/arch/x86/kernel/acpi/cppc.c +++ b/arch/x86/kernel/acpi/cppc.c @@ -4,6 +4,8 @@ * Copyright (c) 2016, Intel Corporation. */ +#include + #include #include #include @@ -149,7 +151,7 @@ int amd_get_highest_perf(unsigned int cpu, u32 *highest_perf) if (ret) goto out; - val = AMD_CPPC_HIGHEST_PERF(val); + val = FIELD_GET(AMD_CPPC_HIGHEST_PERF_MASK, val); } else { ret = cppc_get_highest_perf(cpu, &val); if (ret) diff --git a/drivers/cpufreq/amd-pstate-ut.c b/drivers/cpufreq/amd-pstate-ut.c index edc1475989e3..e671bc7d1550 100644 --- a/drivers/cpufreq/amd-pstate-ut.c +++ b/drivers/cpufreq/amd-pstate-ut.c @@ -22,6 +22,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include #include #include #include @@ -142,10 +143,10 @@ static int amd_pstate_ut_check_perf(u32 index) return ret; } - highest_perf = AMD_CPPC_HIGHEST_PERF(cap1); - nominal_perf = AMD_CPPC_NOMINAL_PERF(cap1); - lowest_nonlinear_perf = AMD_CPPC_LOWNONLIN_PERF(cap1); - lowest_perf = AMD_CPPC_LOWEST_PERF(cap1); + highest_perf = FIELD_GET(AMD_CPPC_HIGHEST_PERF_MASK, cap1); + nominal_perf = FIELD_GET(AMD_CPPC_NOMINAL_PERF_MASK, cap1); + lowest_nonlinear_perf = FIELD_GET(AMD_CPPC_LOWNONLIN_PERF_MASK, cap1); + lowest_perf = FIELD_GET(AMD_CPPC_LOWEST_PERF_MASK, cap1); } cur_perf = READ_ONCE(cpudata->perf); diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 6f1a3056bcbd..5c439b14caae 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -89,11 +89,6 @@ static bool cppc_enabled; static bool amd_pstate_prefcore = true; static struct quirk_entry *quirks; -#define AMD_CPPC_MAX_PERF_MASK GENMASK(7, 0) -#define AMD_CPPC_MIN_PERF_MASK GENMASK(15, 8) -#define AMD_CPPC_DES_PERF_MASK GENMASK(23, 16) -#define AMD_CPPC_EPP_PERF_MASK GENMASK(31, 24) - /* * AMD Energy Preference Performance (EPP) * The EPP is used in the CCLK DPM controller to drive @@ -439,12 +434,13 @@ static int msr_init_perf(struct amd_cpudata *cpudata) perf.highest_perf = numerator; perf.max_limit_perf = numerator; - perf.min_limit_perf = AMD_CPPC_LOWEST_PERF(cap1); - perf.nominal_perf = AMD_CPPC_NOMINAL_PERF(cap1); - perf.lowest_nonlinear_perf = AMD_CPPC_LOWNONLIN_PERF(cap1); - perf.lowest_perf = AMD_CPPC_LOWEST_PERF(cap1); + perf.min_limit_perf = FIELD_GET(AMD_CPPC_LOWEST_PERF_MASK, cap1); + perf.nominal_perf = FIELD_GET(AMD_CPPC_NOMINAL_PERF_MASK, cap1); + perf.lowest_nonlinear_perf = FIELD_GET(AMD_CPPC_LOWNONLIN_PERF_MASK, cap1); + perf.lowest_perf = FIELD_GET(AMD_CPPC_LOWEST_PERF_MASK, cap1); WRITE_ONCE(cpudata->perf, perf); - WRITE_ONCE(cpudata->prefcore_ranking, AMD_CPPC_HIGHEST_PERF(cap1)); + WRITE_ONCE(cpudata->prefcore_ranking, FIELD_GET(AMD_CPPC_HIGHEST_PERF_MASK, cap1)); + return 0; } From 9f5daa2f2f6dddd2a847d077bfddc7dc0d9dab10 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Dec 2024 11:54:32 -0600 Subject: [PATCH 23/29] cpufreq/amd-pstate: Cache CPPC request in shared mem case too In order to prevent a potential write for shmem_update_perf() cache the request into the cppc_req_cached variable normally only used for the MSR case. This adds symmetry into the code and potentially avoids extra writes. Reviewed-by: Dhananjay Ugwekar Reviewed-by: Gautham R. Shenoy Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 5c439b14caae..06bf0d888be6 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -496,6 +496,8 @@ static int shmem_update_perf(struct amd_cpudata *cpudata, u8 min_perf, u8 des_perf, u8 max_perf, u8 epp, bool fast_switch) { struct cppc_perf_ctrls perf_ctrls; + u64 value, prev; + int ret; if (cppc_state == AMD_PSTATE_ACTIVE) { int ret = shmem_set_epp(cpudata, epp); @@ -504,11 +506,29 @@ static int shmem_update_perf(struct amd_cpudata *cpudata, u8 min_perf, return ret; } + value = prev = READ_ONCE(cpudata->cppc_req_cached); + + value &= ~(AMD_CPPC_MAX_PERF_MASK | AMD_CPPC_MIN_PERF_MASK | + AMD_CPPC_DES_PERF_MASK | AMD_CPPC_EPP_PERF_MASK); + value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, max_perf); + value |= FIELD_PREP(AMD_CPPC_DES_PERF_MASK, des_perf); + value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, min_perf); + value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp); + + if (value == prev) + return 0; + perf_ctrls.max_perf = max_perf; perf_ctrls.min_perf = min_perf; perf_ctrls.desired_perf = des_perf; - return cppc_set_perf(cpudata->cpu, &perf_ctrls); + ret = cppc_set_perf(cpudata->cpu, &perf_ctrls); + if (ret) + return ret; + + WRITE_ONCE(cpudata->cppc_req_cached, value); + + return 0; } static inline bool amd_pstate_sample(struct amd_cpudata *cpudata) From 77fbea69b0ffad0e46c7018d07e04b6628482e14 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Dec 2024 11:57:38 -0600 Subject: [PATCH 24/29] cpufreq/amd-pstate: Move all EPP tracing into *_update_perf and *_set_epp functions The EPP tracing is done by the caller today, but this precludes the information about whether the CPPC request has changed. Move it into the update_perf and set_epp functions and include information about whether the request has changed from the last one. amd_pstate_update_perf() and amd_pstate_set_epp() now require the policy as an argument instead of the cpudata. Reviewed-by: Dhananjay Ugwekar Reviewed-by: Gautham R. Shenoy Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate-trace.h | 13 +++- drivers/cpufreq/amd-pstate.c | 118 +++++++++++++++++------------ 2 files changed, 80 insertions(+), 51 deletions(-) diff --git a/drivers/cpufreq/amd-pstate-trace.h b/drivers/cpufreq/amd-pstate-trace.h index f457d4af2c62..32e1bdc588c5 100644 --- a/drivers/cpufreq/amd-pstate-trace.h +++ b/drivers/cpufreq/amd-pstate-trace.h @@ -90,7 +90,8 @@ TRACE_EVENT(amd_pstate_epp_perf, u8 epp, u8 min_perf, u8 max_perf, - bool boost + bool boost, + bool changed ), TP_ARGS(cpu_id, @@ -98,7 +99,8 @@ TRACE_EVENT(amd_pstate_epp_perf, epp, min_perf, max_perf, - boost), + boost, + changed), TP_STRUCT__entry( __field(unsigned int, cpu_id) @@ -107,6 +109,7 @@ TRACE_EVENT(amd_pstate_epp_perf, __field(u8, min_perf) __field(u8, max_perf) __field(bool, boost) + __field(bool, changed) ), TP_fast_assign( @@ -116,15 +119,17 @@ TRACE_EVENT(amd_pstate_epp_perf, __entry->min_perf = min_perf; __entry->max_perf = max_perf; __entry->boost = boost; + __entry->changed = changed; ), - TP_printk("cpu%u: [%hhu<->%hhu]/%hhu, epp=%hhu, boost=%u", + TP_printk("cpu%u: [%hhu<->%hhu]/%hhu, epp=%hhu, boost=%u, changed=%u", (unsigned int)__entry->cpu_id, (u8)__entry->min_perf, (u8)__entry->max_perf, (u8)__entry->highest_perf, (u8)__entry->epp, - (bool)__entry->boost + (bool)__entry->boost, + (bool)__entry->changed ) ); diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 06bf0d888be6..e5db731618e8 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -228,9 +228,10 @@ static u8 shmem_get_epp(struct amd_cpudata *cpudata) return FIELD_GET(AMD_CPPC_EPP_PERF_MASK, epp); } -static int msr_update_perf(struct amd_cpudata *cpudata, u8 min_perf, +static int msr_update_perf(struct cpufreq_policy *policy, u8 min_perf, u8 des_perf, u8 max_perf, u8 epp, bool fast_switch) { + struct amd_cpudata *cpudata = policy->driver_data; u64 value, prev; value = prev = READ_ONCE(cpudata->cppc_req_cached); @@ -242,6 +243,18 @@ static int msr_update_perf(struct amd_cpudata *cpudata, u8 min_perf, value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, min_perf); value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp); + if (trace_amd_pstate_epp_perf_enabled()) { + union perf_cached perf = READ_ONCE(cpudata->perf); + + trace_amd_pstate_epp_perf(cpudata->cpu, + perf.highest_perf, + epp, + min_perf, + max_perf, + policy->boost_enabled, + value != prev); + } + if (value == prev) return 0; @@ -256,24 +269,26 @@ static int msr_update_perf(struct amd_cpudata *cpudata, u8 min_perf, } WRITE_ONCE(cpudata->cppc_req_cached, value); - WRITE_ONCE(cpudata->epp_cached, epp); + if (epp != cpudata->epp_cached) + WRITE_ONCE(cpudata->epp_cached, epp); return 0; } DEFINE_STATIC_CALL(amd_pstate_update_perf, msr_update_perf); -static inline int amd_pstate_update_perf(struct amd_cpudata *cpudata, +static inline int amd_pstate_update_perf(struct cpufreq_policy *policy, u8 min_perf, u8 des_perf, u8 max_perf, u8 epp, bool fast_switch) { - return static_call(amd_pstate_update_perf)(cpudata, min_perf, des_perf, + return static_call(amd_pstate_update_perf)(policy, min_perf, des_perf, max_perf, epp, fast_switch); } -static int msr_set_epp(struct amd_cpudata *cpudata, u8 epp) +static int msr_set_epp(struct cpufreq_policy *policy, u8 epp) { + struct amd_cpudata *cpudata = policy->driver_data; u64 value, prev; int ret; @@ -281,6 +296,19 @@ static int msr_set_epp(struct amd_cpudata *cpudata, u8 epp) value &= ~AMD_CPPC_EPP_PERF_MASK; value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp); + if (trace_amd_pstate_epp_perf_enabled()) { + union perf_cached perf = cpudata->perf; + + trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf, + epp, + FIELD_GET(AMD_CPPC_MIN_PERF_MASK, + cpudata->cppc_req_cached), + FIELD_GET(AMD_CPPC_MAX_PERF_MASK, + cpudata->cppc_req_cached), + policy->boost_enabled, + value != prev); + } + if (value == prev) return 0; @@ -299,15 +327,29 @@ static int msr_set_epp(struct amd_cpudata *cpudata, u8 epp) DEFINE_STATIC_CALL(amd_pstate_set_epp, msr_set_epp); -static inline int amd_pstate_set_epp(struct amd_cpudata *cpudata, u8 epp) +static inline int amd_pstate_set_epp(struct cpufreq_policy *policy, u8 epp) { - return static_call(amd_pstate_set_epp)(cpudata, epp); + return static_call(amd_pstate_set_epp)(policy, epp); } -static int shmem_set_epp(struct amd_cpudata *cpudata, u8 epp) +static int shmem_set_epp(struct cpufreq_policy *policy, u8 epp) { - int ret; + struct amd_cpudata *cpudata = policy->driver_data; struct cppc_perf_ctrls perf_ctrls; + int ret; + + if (trace_amd_pstate_epp_perf_enabled()) { + union perf_cached perf = cpudata->perf; + + trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf, + epp, + FIELD_GET(AMD_CPPC_MIN_PERF_MASK, + cpudata->cppc_req_cached), + FIELD_GET(AMD_CPPC_MAX_PERF_MASK, + cpudata->cppc_req_cached), + policy->boost_enabled, + epp != cpudata->epp_cached); + } if (epp == cpudata->epp_cached) return 0; @@ -339,17 +381,7 @@ static int amd_pstate_set_energy_pref_index(struct cpufreq_policy *policy, return -EBUSY; } - if (trace_amd_pstate_epp_perf_enabled()) { - union perf_cached perf = READ_ONCE(cpudata->perf); - - trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf, - epp, - FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cpudata->cppc_req_cached), - FIELD_GET(AMD_CPPC_MAX_PERF_MASK, cpudata->cppc_req_cached), - policy->boost_enabled); - } - - return amd_pstate_set_epp(cpudata, epp); + return amd_pstate_set_epp(policy, epp); } static inline int msr_cppc_enable(bool enable) @@ -492,15 +524,16 @@ static inline int amd_pstate_init_perf(struct amd_cpudata *cpudata) return static_call(amd_pstate_init_perf)(cpudata); } -static int shmem_update_perf(struct amd_cpudata *cpudata, u8 min_perf, +static int shmem_update_perf(struct cpufreq_policy *policy, u8 min_perf, u8 des_perf, u8 max_perf, u8 epp, bool fast_switch) { + struct amd_cpudata *cpudata = policy->driver_data; struct cppc_perf_ctrls perf_ctrls; u64 value, prev; int ret; if (cppc_state == AMD_PSTATE_ACTIVE) { - int ret = shmem_set_epp(cpudata, epp); + int ret = shmem_set_epp(policy, epp); if (ret) return ret; @@ -515,6 +548,18 @@ static int shmem_update_perf(struct amd_cpudata *cpudata, u8 min_perf, value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, min_perf); value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp); + if (trace_amd_pstate_epp_perf_enabled()) { + union perf_cached perf = READ_ONCE(cpudata->perf); + + trace_amd_pstate_epp_perf(cpudata->cpu, + perf.highest_perf, + epp, + min_perf, + max_perf, + policy->boost_enabled, + value != prev); + } + if (value == prev) return 0; @@ -592,7 +637,7 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u8 min_perf, cpudata->cpu, fast_switch); } - amd_pstate_update_perf(cpudata, min_perf, des_perf, max_perf, 0, fast_switch); + amd_pstate_update_perf(policy, min_perf, des_perf, max_perf, 0, fast_switch); } static int amd_pstate_verify(struct cpufreq_policy_data *policy_data) @@ -1525,7 +1570,7 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) return ret; WRITE_ONCE(cpudata->cppc_req_cached, value); } - ret = amd_pstate_set_epp(cpudata, cpudata->epp_default); + ret = amd_pstate_set_epp(policy, cpudata->epp_default); if (ret) return ret; @@ -1566,14 +1611,8 @@ static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy) epp = READ_ONCE(cpudata->epp_cached); perf = READ_ONCE(cpudata->perf); - if (trace_amd_pstate_epp_perf_enabled()) { - trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf, epp, - perf.min_limit_perf, - perf.max_limit_perf, - policy->boost_enabled); - } - return amd_pstate_update_perf(cpudata, perf.min_limit_perf, 0U, + return amd_pstate_update_perf(policy, perf.min_limit_perf, 0U, perf.max_limit_perf, epp, false); } @@ -1605,20 +1644,12 @@ static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) static int amd_pstate_epp_reenable(struct cpufreq_policy *policy) { - struct amd_cpudata *cpudata = policy->driver_data; - union perf_cached perf = READ_ONCE(cpudata->perf); int ret; ret = amd_pstate_cppc_enable(true); if (ret) pr_err("failed to enable amd pstate during resume, return %d\n", ret); - if (trace_amd_pstate_epp_perf_enabled()) { - trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf, - cpudata->epp_cached, - FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cpudata->cppc_req_cached), - perf.highest_perf, policy->boost_enabled); - } return amd_pstate_epp_update_limit(policy); } @@ -1646,14 +1677,7 @@ static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy) if (cpudata->suspended) return 0; - if (trace_amd_pstate_epp_perf_enabled()) { - trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf, - AMD_CPPC_EPP_BALANCE_POWERSAVE, - perf.lowest_perf, perf.lowest_perf, - policy->boost_enabled); - } - - return amd_pstate_update_perf(cpudata, perf.lowest_perf, 0, perf.lowest_perf, + return amd_pstate_update_perf(policy, perf.lowest_perf, 0, perf.lowest_perf, AMD_CPPC_EPP_BALANCE_POWERSAVE, false); } From 1905fac6f9e08e34135e089704a0733ce711eb83 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Dec 2024 12:02:14 -0600 Subject: [PATCH 25/29] cpufreq/amd-pstate: Update cppc_req_cached for shared mem EPP writes On EPP only writes update the cached variable so that the min/max performance controls don't need to be updated again. Reviewed-by: Dhananjay Ugwekar Reviewed-by: Gautham R. Shenoy Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index e5db731618e8..df42a2d22225 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -336,6 +336,7 @@ static int shmem_set_epp(struct cpufreq_policy *policy, u8 epp) { struct amd_cpudata *cpudata = policy->driver_data; struct cppc_perf_ctrls perf_ctrls; + u64 value; int ret; if (trace_amd_pstate_epp_perf_enabled()) { @@ -362,6 +363,11 @@ static int shmem_set_epp(struct cpufreq_policy *policy, u8 epp) } WRITE_ONCE(cpudata->epp_cached, epp); + value = READ_ONCE(cpudata->cppc_req_cached); + value &= ~AMD_CPPC_EPP_PERF_MASK; + value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp); + WRITE_ONCE(cpudata->cppc_req_cached, value); + return ret; } From 93039a60fb28f72196769869aa4b502f1849a373 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Dec 2024 14:44:15 -0600 Subject: [PATCH 26/29] cpufreq/amd-pstate: Drop debug statements for policy setting There are trace events that exist now for all amd-pstate modes that will output information right before programming to the hardware. This makes the existing debug statements unnecessary remaining overhead. Drop them. Reviewed-by: Dhananjay Ugwekar Reviewed-by: Gautham R. Shenoy Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index df42a2d22225..c70ed41a2448 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -667,7 +667,6 @@ static int amd_pstate_verify(struct cpufreq_policy_data *policy_data) } cpufreq_verify_within_cpu_limits(policy_data); - pr_debug("policy_max =%d, policy_min=%d\n", policy_data->max, policy_data->min); return 0; } @@ -1630,9 +1629,6 @@ static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) if (!policy->cpuinfo.max_freq) return -ENODEV; - pr_debug("set_policy: cpuinfo.max %u policy->max %u\n", - policy->cpuinfo.max_freq, policy->max); - cpudata->policy = policy->policy; ret = amd_pstate_epp_update_limit(policy); From 2064543f5ba0d2929e3e9b3a616c3262a57c7925 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 16 Dec 2024 10:41:23 -0600 Subject: [PATCH 27/29] cpufreq/amd-pstate: Rework CPPC enabling The CPPC enable register is configured as "write once". That is any future writes don't actually do anything. Because of this, all the cleanup paths that currently exist for CPPC disable are non-effective. Rework CPPC enable to only enable after all the CAP registers have been read to avoid enabling CPPC on CPUs with invalid _CPC or unpopulated MSRs. As the register is write once, remove all cleanup paths as well. Reviewed-by: Dhananjay Ugwekar Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 179 +++++++---------------------------- 1 file changed, 35 insertions(+), 144 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index c70ed41a2448..7802f37bde02 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -85,7 +85,6 @@ static struct cpufreq_driver *current_pstate_driver; static struct cpufreq_driver amd_pstate_driver; static struct cpufreq_driver amd_pstate_epp_driver; static int cppc_state = AMD_PSTATE_UNDEFINED; -static bool cppc_enabled; static bool amd_pstate_prefcore = true; static struct quirk_entry *quirks; @@ -371,89 +370,21 @@ static int shmem_set_epp(struct cpufreq_policy *policy, u8 epp) return ret; } -static int amd_pstate_set_energy_pref_index(struct cpufreq_policy *policy, - int pref_index) +static inline int msr_cppc_enable(struct cpufreq_policy *policy) { - struct amd_cpudata *cpudata = policy->driver_data; - u8 epp; - - if (!pref_index) - epp = cpudata->epp_default; - else - epp = epp_values[pref_index]; - - if (epp > 0 && cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) { - pr_debug("EPP cannot be set under performance policy\n"); - return -EBUSY; - } - - return amd_pstate_set_epp(policy, epp); + return wrmsrl_safe_on_cpu(policy->cpu, MSR_AMD_CPPC_ENABLE, 1); } -static inline int msr_cppc_enable(bool enable) +static int shmem_cppc_enable(struct cpufreq_policy *policy) { - int ret, cpu; - unsigned long logical_proc_id_mask = 0; - - /* - * MSR_AMD_CPPC_ENABLE is write-once, once set it cannot be cleared. - */ - if (!enable) - return 0; - - if (enable == cppc_enabled) - return 0; - - for_each_present_cpu(cpu) { - unsigned long logical_id = topology_logical_package_id(cpu); - - if (test_bit(logical_id, &logical_proc_id_mask)) - continue; - - set_bit(logical_id, &logical_proc_id_mask); - - ret = wrmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_ENABLE, - enable); - if (ret) - return ret; - } - - cppc_enabled = enable; - return 0; -} - -static int shmem_cppc_enable(bool enable) -{ - int cpu, ret = 0; - struct cppc_perf_ctrls perf_ctrls; - - if (enable == cppc_enabled) - return 0; - - for_each_present_cpu(cpu) { - ret = cppc_set_enable(cpu, enable); - if (ret) - return ret; - - /* Enable autonomous mode for EPP */ - if (cppc_state == AMD_PSTATE_ACTIVE) { - /* Set desired perf as zero to allow EPP firmware control */ - perf_ctrls.desired_perf = 0; - ret = cppc_set_perf(cpu, &perf_ctrls); - if (ret) - return ret; - } - } - - cppc_enabled = enable; - return ret; + return cppc_set_enable(policy->cpu, 1); } DEFINE_STATIC_CALL(amd_pstate_cppc_enable, msr_cppc_enable); -static inline int amd_pstate_cppc_enable(bool enable) +static inline int amd_pstate_cppc_enable(struct cpufreq_policy *policy) { - return static_call(amd_pstate_cppc_enable)(enable); + return static_call(amd_pstate_cppc_enable)(policy); } static int msr_init_perf(struct amd_cpudata *cpudata) @@ -1063,6 +994,10 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy) cpudata->nominal_freq, perf.highest_perf); + ret = amd_pstate_cppc_enable(policy); + if (ret) + goto free_cpudata1; + policy->boost_enabled = READ_ONCE(cpudata->boost_supported); /* It will be updated by governor */ @@ -1110,28 +1045,6 @@ static void amd_pstate_cpu_exit(struct cpufreq_policy *policy) kfree(cpudata); } -static int amd_pstate_cpu_resume(struct cpufreq_policy *policy) -{ - int ret; - - ret = amd_pstate_cppc_enable(true); - if (ret) - pr_err("failed to enable amd-pstate during resume, return %d\n", ret); - - return ret; -} - -static int amd_pstate_cpu_suspend(struct cpufreq_policy *policy) -{ - int ret; - - ret = amd_pstate_cppc_enable(false); - if (ret) - pr_err("failed to disable amd-pstate during suspend, return %d\n", ret); - - return ret; -} - /* Sysfs attributes */ /* @@ -1223,8 +1136,10 @@ static ssize_t show_energy_performance_available_preferences( static ssize_t store_energy_performance_preference( struct cpufreq_policy *policy, const char *buf, size_t count) { + struct amd_cpudata *cpudata = policy->driver_data; char str_preference[21]; ssize_t ret; + u8 epp; ret = sscanf(buf, "%20s", str_preference); if (ret != 1) @@ -1234,7 +1149,17 @@ static ssize_t store_energy_performance_preference( if (ret < 0) return -EINVAL; - ret = amd_pstate_set_energy_pref_index(policy, ret); + if (!ret) + epp = cpudata->epp_default; + else + epp = epp_values[ret]; + + if (epp > 0 && policy->policy == CPUFREQ_POLICY_PERFORMANCE) { + pr_debug("EPP cannot be set under performance policy\n"); + return -EBUSY; + } + + ret = amd_pstate_set_epp(policy, epp); return ret ? ret : count; } @@ -1267,7 +1192,6 @@ static ssize_t show_energy_performance_preference( static void amd_pstate_driver_cleanup(void) { - amd_pstate_cppc_enable(false); cppc_state = AMD_PSTATE_DISABLE; current_pstate_driver = NULL; } @@ -1301,14 +1225,6 @@ static int amd_pstate_register_driver(int mode) cppc_state = mode; - ret = amd_pstate_cppc_enable(true); - if (ret) { - pr_err("failed to enable cppc during amd-pstate driver registration, return %d\n", - ret); - amd_pstate_driver_cleanup(); - return ret; - } - /* at least one CPU supports CPB */ current_pstate_driver->boost_enabled = cpu_feature_enabled(X86_FEATURE_CPB); @@ -1548,11 +1464,15 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) policy->cpuinfo.max_freq = policy->max = perf_to_freq(perf, cpudata->nominal_freq, perf.highest_perf); + policy->driver_data = cpudata; + + ret = amd_pstate_cppc_enable(policy); + if (ret) + goto free_cpudata1; /* It will be updated by governor */ policy->cur = policy->cpuinfo.min_freq; - policy->driver_data = cpudata; policy->boost_enabled = READ_ONCE(cpudata->boost_supported); @@ -1644,31 +1564,11 @@ static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) return 0; } -static int amd_pstate_epp_reenable(struct cpufreq_policy *policy) -{ - int ret; - - ret = amd_pstate_cppc_enable(true); - if (ret) - pr_err("failed to enable amd pstate during resume, return %d\n", ret); - - - return amd_pstate_epp_update_limit(policy); -} - static int amd_pstate_epp_cpu_online(struct cpufreq_policy *policy) { - struct amd_cpudata *cpudata = policy->driver_data; - int ret; + pr_debug("AMD CPU Core %d going online\n", policy->cpu); - pr_debug("AMD CPU Core %d going online\n", cpudata->cpu); - - ret = amd_pstate_epp_reenable(policy); - if (ret) - return ret; - cpudata->suspended = false; - - return 0; + return amd_pstate_cppc_enable(policy); } static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy) @@ -1686,11 +1586,6 @@ static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy) static int amd_pstate_epp_suspend(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata = policy->driver_data; - int ret; - - /* avoid suspending when EPP is not enabled */ - if (cppc_state != AMD_PSTATE_ACTIVE) - return 0; /* invalidate to ensure it's rewritten during resume */ cpudata->cppc_req_cached = 0; @@ -1698,11 +1593,6 @@ static int amd_pstate_epp_suspend(struct cpufreq_policy *policy) /* set this flag to avoid setting core offline*/ cpudata->suspended = true; - /* disable CPPC in lowlevel firmware */ - ret = amd_pstate_cppc_enable(false); - if (ret) - pr_err("failed to suspend, return %d\n", ret); - return 0; } @@ -1711,8 +1601,12 @@ static int amd_pstate_epp_resume(struct cpufreq_policy *policy) struct amd_cpudata *cpudata = policy->driver_data; if (cpudata->suspended) { + int ret; + /* enable amd pstate from suspend state*/ - amd_pstate_epp_reenable(policy); + ret = amd_pstate_epp_update_limit(policy); + if (ret) + return ret; cpudata->suspended = false; } @@ -1727,8 +1621,6 @@ static struct cpufreq_driver amd_pstate_driver = { .fast_switch = amd_pstate_fast_switch, .init = amd_pstate_cpu_init, .exit = amd_pstate_cpu_exit, - .suspend = amd_pstate_cpu_suspend, - .resume = amd_pstate_cpu_resume, .set_boost = amd_pstate_set_boost, .update_limits = amd_pstate_update_limits, .name = "amd-pstate", @@ -1895,7 +1787,6 @@ static int __init amd_pstate_init(void) global_attr_free: cpufreq_unregister_driver(current_pstate_driver); - amd_pstate_cppc_enable(false); return ret; } device_initcall(amd_pstate_init); From 4e16c1175238f2b9d86199b427c5db1a24c9a85f Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Sun, 19 Jan 2025 07:05:43 -0600 Subject: [PATCH 28/29] cpufreq/amd-pstate: Stop caching EPP EPP values are cached in the cpudata structure per CPU. This is needless though because they are also cached in the CPPC request variable. Drop the separate cache for EPP values and always reference the CPPC request variable when needed. Reviewed-by: Dhananjay Ugwekar Reviewed-by: Gautham R. Shenoy Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 19 ++++++++++--------- drivers/cpufreq/amd-pstate.h | 1 - 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 7802f37bde02..8c5f4449adfa 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -268,8 +268,6 @@ static int msr_update_perf(struct cpufreq_policy *policy, u8 min_perf, } WRITE_ONCE(cpudata->cppc_req_cached, value); - if (epp != cpudata->epp_cached) - WRITE_ONCE(cpudata->epp_cached, epp); return 0; } @@ -318,7 +316,6 @@ static int msr_set_epp(struct cpufreq_policy *policy, u8 epp) } /* update both so that msr_update_perf() can effectively check */ - WRITE_ONCE(cpudata->epp_cached, epp); WRITE_ONCE(cpudata->cppc_req_cached, value); return ret; @@ -335,9 +332,12 @@ static int shmem_set_epp(struct cpufreq_policy *policy, u8 epp) { struct amd_cpudata *cpudata = policy->driver_data; struct cppc_perf_ctrls perf_ctrls; + u8 epp_cached; u64 value; int ret; + + epp_cached = FIELD_GET(AMD_CPPC_EPP_PERF_MASK, cpudata->cppc_req_cached); if (trace_amd_pstate_epp_perf_enabled()) { union perf_cached perf = cpudata->perf; @@ -348,10 +348,10 @@ static int shmem_set_epp(struct cpufreq_policy *policy, u8 epp) FIELD_GET(AMD_CPPC_MAX_PERF_MASK, cpudata->cppc_req_cached), policy->boost_enabled, - epp != cpudata->epp_cached); + epp != epp_cached); } - if (epp == cpudata->epp_cached) + if (epp == epp_cached) return 0; perf_ctrls.energy_perf = epp; @@ -360,7 +360,6 @@ static int shmem_set_epp(struct cpufreq_policy *policy, u8 epp) pr_debug("failed to set energy perf value (%d)\n", ret); return ret; } - WRITE_ONCE(cpudata->epp_cached, epp); value = READ_ONCE(cpudata->cppc_req_cached); value &= ~AMD_CPPC_EPP_PERF_MASK; @@ -1168,9 +1167,11 @@ static ssize_t show_energy_performance_preference( struct cpufreq_policy *policy, char *buf) { struct amd_cpudata *cpudata = policy->driver_data; - u8 preference; + u8 preference, epp; - switch (cpudata->epp_cached) { + epp = FIELD_GET(AMD_CPPC_EPP_PERF_MASK, cpudata->cppc_req_cached); + + switch (epp) { case AMD_CPPC_EPP_PERFORMANCE: preference = EPP_INDEX_PERFORMANCE; break; @@ -1533,7 +1534,7 @@ static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy) if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) epp = 0; else - epp = READ_ONCE(cpudata->epp_cached); + epp = FIELD_GET(AMD_CPPC_EPP_PERF_MASK, cpudata->cppc_req_cached); perf = READ_ONCE(cpudata->perf); diff --git a/drivers/cpufreq/amd-pstate.h b/drivers/cpufreq/amd-pstate.h index 1557e1afea79..fbe1c08d3f06 100644 --- a/drivers/cpufreq/amd-pstate.h +++ b/drivers/cpufreq/amd-pstate.h @@ -102,7 +102,6 @@ struct amd_cpudata { bool hw_prefcore; /* EPP feature related attributes*/ - u8 epp_cached; u32 policy; bool suspended; u8 epp_default; From efb758c8c803217e58248f03db372c5e23827dae Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 19 Feb 2025 14:08:14 -0600 Subject: [PATCH 29/29] cpufreq/amd-pstate: Drop actions in amd_pstate_epp_cpu_offline() When the CPU goes offline there is no need to change the CPPC request because the CPU will go into the deepest C-state it supports already. Actually changing the CPPC request when it goes offline messes up the cached values and can lead to the wrong values being restored when it comes back. Instead drop the actions and if the CPU comes back online let amd_pstate_epp_set_policy() restore it to expected values. Reviewed-by: Dhananjay Ugwekar Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 8c5f4449adfa..024d33d5e367 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -1574,14 +1574,7 @@ static int amd_pstate_epp_cpu_online(struct cpufreq_policy *policy) static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy) { - struct amd_cpudata *cpudata = policy->driver_data; - union perf_cached perf = READ_ONCE(cpudata->perf); - - if (cpudata->suspended) - return 0; - - return amd_pstate_update_perf(policy, perf.lowest_perf, 0, perf.lowest_perf, - AMD_CPPC_EPP_BALANCE_POWERSAVE, false); + return 0; } static int amd_pstate_epp_suspend(struct cpufreq_policy *policy)