Merge back earlier cpufreq material for 6.15

This commit is contained in:
Rafael J. Wysocki 2025-03-06 21:30:26 +01:00
commit 7983a0b565
4 changed files with 47 additions and 31 deletions

View File

@ -2316,6 +2316,9 @@
per_cpu_perf_limits
Allow per-logical-CPU P-State performance control limits using
cpufreq sysfs interface
no_cas
Do not enable capacity-aware scheduling (CAS) on
hybrid systems
intremap= [X86-64,Intel-IOMMU,EARLY]
on enable Interrupt Remapping (default)

View File

@ -696,6 +696,9 @@ of them have to be prepended with the ``intel_pstate=`` prefix.
Use per-logical-CPU P-State limits (see `Coordination of P-state
Limits`_ for details).
``no_cas``
Do not enable capacity-aware scheduling (CAS) which is enabled by
default on hybrid systems.
Diagnostics and Tuning
======================

View File

@ -145,7 +145,23 @@ unsigned int dbs_update(struct cpufreq_policy *policy)
time_elapsed = update_time - j_cdbs->prev_update_time;
j_cdbs->prev_update_time = update_time;
idle_time = cur_idle_time - j_cdbs->prev_cpu_idle;
/*
* cur_idle_time could be smaller than j_cdbs->prev_cpu_idle if
* it's obtained from get_cpu_idle_time_jiffy() when NOHZ is
* off, where idle_time is calculated by the difference between
* time elapsed in jiffies and "busy time" obtained from CPU
* statistics. If a CPU is 100% busy, the time elapsed and busy
* time should grow with the same amount in two consecutive
* samples, but in practice there could be a tiny difference,
* making the accumulated idle time decrease sometimes. Hence,
* in this case, idle_time should be regarded as 0 in order to
* make the further process correct.
*/
if (cur_idle_time > j_cdbs->prev_cpu_idle)
idle_time = cur_idle_time - j_cdbs->prev_cpu_idle;
else
idle_time = 0;
j_cdbs->prev_cpu_idle = cur_idle_time;
if (ignore_nice) {
@ -162,7 +178,7 @@ unsigned int dbs_update(struct cpufreq_policy *policy)
* calls, so the previous load value can be used then.
*/
load = j_cdbs->prev_load;
} else if (unlikely((int)idle_time > 2 * sampling_rate &&
} else if (unlikely(idle_time > 2 * sampling_rate &&
j_cdbs->prev_load)) {
/*
* If the CPU had gone completely idle and a task has
@ -189,30 +205,15 @@ unsigned int dbs_update(struct cpufreq_policy *policy)
load = j_cdbs->prev_load;
j_cdbs->prev_load = 0;
} else {
if (time_elapsed >= idle_time) {
if (time_elapsed > idle_time)
load = 100 * (time_elapsed - idle_time) / time_elapsed;
} else {
/*
* That can happen if idle_time is returned by
* get_cpu_idle_time_jiffy(). In that case
* idle_time is roughly equal to the difference
* between time_elapsed and "busy time" obtained
* from CPU statistics. Then, the "busy time"
* can end up being greater than time_elapsed
* (for example, if jiffies_64 and the CPU
* statistics are updated by different CPUs),
* so idle_time may in fact be negative. That
* means, though, that the CPU was busy all
* the time (on the rough average) during the
* last sampling interval and 100 can be
* returned as the load.
*/
load = (int)idle_time < 0 ? 100 : 0;
}
else
load = 0;
j_cdbs->prev_load = load;
}
if (unlikely((int)idle_time > 2 * sampling_rate)) {
if (unlikely(idle_time > 2 * sampling_rate)) {
unsigned int periods = idle_time / sampling_rate;
if (periods < idle_periods)

View File

@ -936,6 +936,8 @@ static struct freq_attr *hwp_cpufreq_attrs[] = {
NULL,
};
static bool no_cas __ro_after_init;
static struct cpudata *hybrid_max_perf_cpu __read_mostly;
/*
* Protects hybrid_max_perf_cpu, the capacity_perf fields in struct cpudata,
@ -1041,6 +1043,10 @@ static void hybrid_refresh_cpu_capacity_scaling(void)
static void hybrid_init_cpu_capacity_scaling(bool refresh)
{
/* Bail out if enabling capacity-aware scheduling is prohibited. */
if (no_cas)
return;
/*
* If hybrid_max_perf_cpu is set at this point, the hybrid CPU capacity
* scaling has been enabled already and the driver is just changing the
@ -3688,6 +3694,15 @@ static int __init intel_pstate_init(void)
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
return -ENODEV;
/*
* The Intel pstate driver will be ignored if the platform
* firmware has its own power management modes.
*/
if (intel_pstate_platform_pwr_mgmt_exists()) {
pr_info("P-states controlled by the platform\n");
return -ENODEV;
}
id = x86_match_cpu(hwp_support_ids);
if (id) {
hwp_forced = intel_pstate_hwp_is_enabled();
@ -3743,15 +3758,6 @@ static int __init intel_pstate_init(void)
default_driver = &intel_cpufreq;
hwp_cpu_matched:
/*
* The Intel pstate driver will be ignored if the platform
* firmware has its own power management modes.
*/
if (intel_pstate_platform_pwr_mgmt_exists()) {
pr_info("P-states controlled by the platform\n");
return -ENODEV;
}
if (!hwp_active && hwp_only)
return -ENOTSUPP;
@ -3835,6 +3841,9 @@ static int __init intel_pstate_setup(char *str)
if (!strcmp(str, "no_hwp"))
no_hwp = 1;
if (!strcmp(str, "no_cas"))
no_cas = true;
if (!strcmp(str, "force"))
force_load = 1;
if (!strcmp(str, "hwp_only"))