Power management fixes for 6.15-rc3

- Fix the pefrormance-to-frequency scaling factor computation on
    systems using HWP in the intel_pstate driver after a recent incorrect
    update of it (Rafael Wysocki).
 
  - Fix the usage of the CPUFREQ_NEED_UPDATE_LIMITS cpufreq driver flag
    in the schedutil cpufreq governor after a recent update of it that
    has caused frequency limits changes to be missed sometimes (Rafael
    Wysocki).
 
  - Address some recently discovered synchronization issues related to
    frequency limits changes in the schedutil cpufreq governor and in
    the cpufreq core (Rafael Wysocki).
 
  - Fix ITMT support in the amd-pstate cpufreq driver so that it is
    enabled after asym priorities have been correctly initialized for
    all CPUs (K Prateek Nayak).
 
  - Fix changing min/max limits in the amd-pstate cpufreq driver while
    on the performance governor (Dhananjay Ugwekar).
 
  - Fix a function name in the runtime PM documentation that was
    previously incorrectly updated by mistake (Sakari Ailus).
 -----BEGIN PGP SIGNATURE-----
 
 iQFGBAABCAAwFiEEcM8Aw/RY0dgsiRUR7l+9nS/U47UFAmgCpXwSHHJqd0Byand5
 c29ja2kubmV0AAoJEO5fvZ0v1OO1EIkH/13UC2LyeFWJLnj6AI072xPcOkxrEYGt
 dkEqmacPFb+MV4IoWH3XXmLn2Ag5Os7HTeNN6BJFN1apNJSHg4teeGEi/bISZPLi
 YHpBB5BEZpioTht8unZrxtbrPcHd0Q/lQFMqqIS+EK/0XIzSnXSM2c/69U3h8M02
 Osn8BVIlNLLZd3nm/T4A4Tite8X1jZmxukvCuv4CXtWVj4S25UFuWM34AItKZgl3
 QGsaTqB1CLJ69VkZLsYnWRvUvqslirj4hOpw1jcSzP0lnejsde189/WNXEgdtY/e
 cYqc+QxaGHZjj+kIUBM5/ABpgt9FVHY7I6MMEjuKla85LU1cgITghlk=
 =vdxx
 -----END PGP SIGNATURE-----

Merge tag 'pm-6.15-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm

Pull power management fixes from Rafael Wysocki:
 "These are mostly cpufreq fixes, some of which address recent
  regressions and some address older issues that have come to light
  during the last two weeks, and a runtime PM documentation correction:

   - Fix the performance-to-frequency scaling factor computation on
     systems using HWP in the intel_pstate driver after a recent
     incorrect update of it (Rafael Wysocki)

   - Fix the usage of the CPUFREQ_NEED_UPDATE_LIMITS cpufreq driver flag
     in the schedutil cpufreq governor after a recent update of it that
     has caused frequency limits changes to be missed sometimes (Rafael
     Wysocki)

   - Address some recently discovered synchronization issues related to
     frequency limits changes in the schedutil cpufreq governor and in
     the cpufreq core (Rafael Wysocki)

   - Fix ITMT support in the amd-pstate cpufreq driver so that it is
     enabled after asym priorities have been correctly initialized for
     all CPUs (K Prateek Nayak)

   - Fix changing min/max limits in the amd-pstate cpufreq driver while
     on the performance governor (Dhananjay Ugwekar)

   - Fix a function name in the runtime PM documentation that was
     previously incorrectly updated by mistake (Sakari Ailus)"

* tag 'pm-6.15-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm:
  cpufreq: Avoid using inconsistent policy->min and policy->max
  cpufreq/sched: Set need_freq_update in ignore_dl_rate_limit()
  cpufreq/sched: Explicitly synchronize limits_changed flag handling
  cpufreq/sched: Fix the usage of CPUFREQ_NEED_UPDATE_LIMITS
  Documentation: PM: runtime: Fix a reference to pm_runtime_autosuspend()
  cpufreq: intel_pstate: Fix hwp_get_cpu_scaling()
  cpufreq/amd-pstate: Enable ITMT support after initializing core rankings
  cpufreq/amd-pstate: Fix min_limit perf and freq updation for performance governor
This commit is contained in:
Linus Torvalds 2025-04-18 13:06:12 -07:00
commit cb64c513b5
5 changed files with 86 additions and 39 deletions

View File

@ -154,7 +154,7 @@ suspending the device are satisfied) and to queue up a suspend request for the
device in that case. If there is no idle callback, or if the callback returns
0, then the PM core will attempt to carry out a runtime suspend of the device,
also respecting devices configured for autosuspend. In essence this means a
call to __pm_runtime_autosuspend() (do note that drivers needs to update the
call to pm_runtime_autosuspend() (do note that drivers needs to update the
device last busy mark, pm_runtime_mark_last_busy(), to control the delay under
this circumstance). To prevent this (for example, if the callback routine has
started a delayed suspend), the routine must return a non-zero value. Negative

View File

@ -607,13 +607,16 @@ static void amd_pstate_update_min_max_limit(struct cpufreq_policy *policy)
union perf_cached perf = READ_ONCE(cpudata->perf);
perf.max_limit_perf = freq_to_perf(perf, cpudata->nominal_freq, policy->max);
perf.min_limit_perf = freq_to_perf(perf, cpudata->nominal_freq, policy->min);
if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
perf.min_limit_perf = min(perf.nominal_perf, perf.max_limit_perf);
WRITE_ONCE(cpudata->max_limit_freq, policy->max);
WRITE_ONCE(cpudata->min_limit_freq, policy->min);
if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) {
perf.min_limit_perf = min(perf.nominal_perf, perf.max_limit_perf);
WRITE_ONCE(cpudata->min_limit_freq, min(cpudata->nominal_freq, cpudata->max_limit_freq));
} else {
perf.min_limit_perf = freq_to_perf(perf, cpudata->nominal_freq, policy->min);
WRITE_ONCE(cpudata->min_limit_freq, policy->min);
}
WRITE_ONCE(cpudata->perf, perf);
}
@ -791,16 +794,6 @@ static void amd_perf_ctl_reset(unsigned int cpu)
wrmsrl_on_cpu(cpu, MSR_AMD_PERF_CTL, 0);
}
/*
* Set amd-pstate preferred core enable can't be done directly from cpufreq callbacks
* due to locking, so queue the work for later.
*/
static void amd_pstste_sched_prefcore_workfn(struct work_struct *work)
{
sched_set_itmt_support();
}
static DECLARE_WORK(sched_prefcore_work, amd_pstste_sched_prefcore_workfn);
#define CPPC_MAX_PERF U8_MAX
static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata)
@ -811,14 +804,8 @@ static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata)
cpudata->hw_prefcore = true;
/*
* The priorities can be set regardless of whether or not
* sched_set_itmt_support(true) has been called and it is valid to
* update them at any time after it has been called.
*/
/* Priorities must be initialized before ITMT support can be toggled on. */
sched_set_itmt_core_prio((int)READ_ONCE(cpudata->prefcore_ranking), cpudata->cpu);
schedule_work(&sched_prefcore_work);
}
static void amd_pstate_update_limits(unsigned int cpu)
@ -1193,6 +1180,9 @@ static ssize_t show_energy_performance_preference(
static void amd_pstate_driver_cleanup(void)
{
if (amd_pstate_prefcore)
sched_clear_itmt_support();
cppc_state = AMD_PSTATE_DISABLE;
current_pstate_driver = NULL;
}
@ -1235,6 +1225,10 @@ static int amd_pstate_register_driver(int mode)
return ret;
}
/* Enable ITMT support once all CPUs have initialized their asym priorities. */
if (amd_pstate_prefcore)
sched_set_itmt_support();
return 0;
}

View File

@ -540,8 +540,6 @@ static unsigned int __resolve_freq(struct cpufreq_policy *policy,
{
unsigned int idx;
target_freq = clamp_val(target_freq, policy->min, policy->max);
if (!policy->freq_table)
return target_freq;
@ -565,7 +563,22 @@ static unsigned int __resolve_freq(struct cpufreq_policy *policy,
unsigned int cpufreq_driver_resolve_freq(struct cpufreq_policy *policy,
unsigned int target_freq)
{
return __resolve_freq(policy, target_freq, CPUFREQ_RELATION_LE);
unsigned int min = READ_ONCE(policy->min);
unsigned int max = READ_ONCE(policy->max);
/*
* If this function runs in parallel with cpufreq_set_policy(), it may
* read policy->min before the update and policy->max after the update
* or the other way around, so there is no ordering guarantee.
*
* Resolve this by always honoring the max (in case it comes from
* thermal throttling or similar).
*/
if (unlikely(min > max))
min = max;
return __resolve_freq(policy, clamp_val(target_freq, min, max),
CPUFREQ_RELATION_LE);
}
EXPORT_SYMBOL_GPL(cpufreq_driver_resolve_freq);
@ -2384,6 +2397,7 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy,
if (cpufreq_disabled())
return -ENODEV;
target_freq = clamp_val(target_freq, policy->min, policy->max);
target_freq = __resolve_freq(policy, target_freq, relation);
pr_debug("target for CPU %u: %u kHz, relation %u, requested %u kHz\n",
@ -2708,11 +2722,15 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
* Resolve policy min/max to available frequencies. It ensures
* no frequency resolution will neither overshoot the requested maximum
* nor undershoot the requested minimum.
*
* Avoid storing intermediate values in policy->max or policy->min and
* compiler optimizations around them because they may be accessed
* concurrently by cpufreq_driver_resolve_freq() during the update.
*/
policy->min = new_data.min;
policy->max = new_data.max;
policy->min = __resolve_freq(policy, policy->min, CPUFREQ_RELATION_L);
policy->max = __resolve_freq(policy, policy->max, CPUFREQ_RELATION_H);
WRITE_ONCE(policy->max, __resolve_freq(policy, new_data.max, CPUFREQ_RELATION_H));
new_data.min = __resolve_freq(policy, new_data.min, CPUFREQ_RELATION_L);
WRITE_ONCE(policy->min, new_data.min > policy->max ? policy->max : new_data.min);
trace_cpu_frequency_limits(policy);
cpufreq_update_pressure(policy);

View File

@ -2209,7 +2209,7 @@ static int knl_get_turbo_pstate(int cpu)
static int hwp_get_cpu_scaling(int cpu)
{
if (hybrid_scaling_factor) {
struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());
struct cpuinfo_x86 *c = &cpu_data(cpu);
u8 cpu_type = c->topo.intel_type;
/*

View File

@ -81,9 +81,23 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time)
if (!cpufreq_this_cpu_can_update(sg_policy->policy))
return false;
if (unlikely(sg_policy->limits_changed)) {
sg_policy->limits_changed = false;
sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS);
if (unlikely(READ_ONCE(sg_policy->limits_changed))) {
WRITE_ONCE(sg_policy->limits_changed, false);
sg_policy->need_freq_update = true;
/*
* The above limits_changed update must occur before the reads
* of policy limits in cpufreq_driver_resolve_freq() or a policy
* limits update might be missed, so use a memory barrier to
* ensure it.
*
* This pairs with the write memory barrier in sugov_limits().
*/
smp_mb();
return true;
} else if (sg_policy->need_freq_update) {
/* ignore_dl_rate_limit() wants a new frequency to be found. */
return true;
}
@ -95,10 +109,22 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time)
static bool sugov_update_next_freq(struct sugov_policy *sg_policy, u64 time,
unsigned int next_freq)
{
if (sg_policy->need_freq_update)
if (sg_policy->need_freq_update) {
sg_policy->need_freq_update = false;
else if (sg_policy->next_freq == next_freq)
/*
* The policy limits have changed, but if the return value of
* cpufreq_driver_resolve_freq() after applying the new limits
* is still equal to the previously selected frequency, the
* driver callback need not be invoked unless the driver
* specifically wants that to happen on every update of the
* policy limits.
*/
if (sg_policy->next_freq == next_freq &&
!cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS))
return false;
} else if (sg_policy->next_freq == next_freq) {
return false;
}
sg_policy->next_freq = next_freq;
sg_policy->last_freq_update_time = time;
@ -365,7 +391,7 @@ static inline bool sugov_hold_freq(struct sugov_cpu *sg_cpu) { return false; }
static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu)
{
if (cpu_bw_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->bw_min)
sg_cpu->sg_policy->limits_changed = true;
sg_cpu->sg_policy->need_freq_update = true;
}
static inline bool sugov_update_single_common(struct sugov_cpu *sg_cpu,
@ -871,7 +897,16 @@ static void sugov_limits(struct cpufreq_policy *policy)
mutex_unlock(&sg_policy->work_lock);
}
sg_policy->limits_changed = true;
/*
* The limits_changed update below must take place before the updates
* of policy limits in cpufreq_set_policy() or a policy limits update
* might be missed, so use a memory barrier to ensure it.
*
* This pairs with the memory barrier in sugov_should_update_freq().
*/
smp_wmb();
WRITE_ONCE(sg_policy->limits_changed, true);
}
struct cpufreq_governor schedutil_gov = {