mirror of
https://github.com/torvalds/linux.git
synced 2026-05-26 16:12:59 +02:00
Merge back earlier cpufreq material for 6.15
This commit is contained in:
commit
7983a0b565
|
|
@ -2316,6 +2316,9 @@
|
|||
per_cpu_perf_limits
|
||||
Allow per-logical-CPU P-State performance control limits using
|
||||
cpufreq sysfs interface
|
||||
no_cas
|
||||
Do not enable capacity-aware scheduling (CAS) on
|
||||
hybrid systems
|
||||
|
||||
intremap= [X86-64,Intel-IOMMU,EARLY]
|
||||
on enable Interrupt Remapping (default)
|
||||
|
|
|
|||
|
|
@ -696,6 +696,9 @@ of them have to be prepended with the ``intel_pstate=`` prefix.
|
|||
Use per-logical-CPU P-State limits (see `Coordination of P-state
|
||||
Limits`_ for details).
|
||||
|
||||
``no_cas``
|
||||
Do not enable capacity-aware scheduling (CAS) which is enabled by
|
||||
default on hybrid systems.
|
||||
|
||||
Diagnostics and Tuning
|
||||
======================
|
||||
|
|
|
|||
|
|
@ -145,7 +145,23 @@ unsigned int dbs_update(struct cpufreq_policy *policy)
|
|||
time_elapsed = update_time - j_cdbs->prev_update_time;
|
||||
j_cdbs->prev_update_time = update_time;
|
||||
|
||||
idle_time = cur_idle_time - j_cdbs->prev_cpu_idle;
|
||||
/*
|
||||
* cur_idle_time could be smaller than j_cdbs->prev_cpu_idle if
|
||||
* it's obtained from get_cpu_idle_time_jiffy() when NOHZ is
|
||||
* off, where idle_time is calculated by the difference between
|
||||
* time elapsed in jiffies and "busy time" obtained from CPU
|
||||
* statistics. If a CPU is 100% busy, the time elapsed and busy
|
||||
* time should grow with the same amount in two consecutive
|
||||
* samples, but in practice there could be a tiny difference,
|
||||
* making the accumulated idle time decrease sometimes. Hence,
|
||||
* in this case, idle_time should be regarded as 0 in order to
|
||||
* make the further process correct.
|
||||
*/
|
||||
if (cur_idle_time > j_cdbs->prev_cpu_idle)
|
||||
idle_time = cur_idle_time - j_cdbs->prev_cpu_idle;
|
||||
else
|
||||
idle_time = 0;
|
||||
|
||||
j_cdbs->prev_cpu_idle = cur_idle_time;
|
||||
|
||||
if (ignore_nice) {
|
||||
|
|
@ -162,7 +178,7 @@ unsigned int dbs_update(struct cpufreq_policy *policy)
|
|||
* calls, so the previous load value can be used then.
|
||||
*/
|
||||
load = j_cdbs->prev_load;
|
||||
} else if (unlikely((int)idle_time > 2 * sampling_rate &&
|
||||
} else if (unlikely(idle_time > 2 * sampling_rate &&
|
||||
j_cdbs->prev_load)) {
|
||||
/*
|
||||
* If the CPU had gone completely idle and a task has
|
||||
|
|
@ -189,30 +205,15 @@ unsigned int dbs_update(struct cpufreq_policy *policy)
|
|||
load = j_cdbs->prev_load;
|
||||
j_cdbs->prev_load = 0;
|
||||
} else {
|
||||
if (time_elapsed >= idle_time) {
|
||||
if (time_elapsed > idle_time)
|
||||
load = 100 * (time_elapsed - idle_time) / time_elapsed;
|
||||
} else {
|
||||
/*
|
||||
* That can happen if idle_time is returned by
|
||||
* get_cpu_idle_time_jiffy(). In that case
|
||||
* idle_time is roughly equal to the difference
|
||||
* between time_elapsed and "busy time" obtained
|
||||
* from CPU statistics. Then, the "busy time"
|
||||
* can end up being greater than time_elapsed
|
||||
* (for example, if jiffies_64 and the CPU
|
||||
* statistics are updated by different CPUs),
|
||||
* so idle_time may in fact be negative. That
|
||||
* means, though, that the CPU was busy all
|
||||
* the time (on the rough average) during the
|
||||
* last sampling interval and 100 can be
|
||||
* returned as the load.
|
||||
*/
|
||||
load = (int)idle_time < 0 ? 100 : 0;
|
||||
}
|
||||
else
|
||||
load = 0;
|
||||
|
||||
j_cdbs->prev_load = load;
|
||||
}
|
||||
|
||||
if (unlikely((int)idle_time > 2 * sampling_rate)) {
|
||||
if (unlikely(idle_time > 2 * sampling_rate)) {
|
||||
unsigned int periods = idle_time / sampling_rate;
|
||||
|
||||
if (periods < idle_periods)
|
||||
|
|
|
|||
|
|
@ -936,6 +936,8 @@ static struct freq_attr *hwp_cpufreq_attrs[] = {
|
|||
NULL,
|
||||
};
|
||||
|
||||
static bool no_cas __ro_after_init;
|
||||
|
||||
static struct cpudata *hybrid_max_perf_cpu __read_mostly;
|
||||
/*
|
||||
* Protects hybrid_max_perf_cpu, the capacity_perf fields in struct cpudata,
|
||||
|
|
@ -1041,6 +1043,10 @@ static void hybrid_refresh_cpu_capacity_scaling(void)
|
|||
|
||||
static void hybrid_init_cpu_capacity_scaling(bool refresh)
|
||||
{
|
||||
/* Bail out if enabling capacity-aware scheduling is prohibited. */
|
||||
if (no_cas)
|
||||
return;
|
||||
|
||||
/*
|
||||
* If hybrid_max_perf_cpu is set at this point, the hybrid CPU capacity
|
||||
* scaling has been enabled already and the driver is just changing the
|
||||
|
|
@ -3688,6 +3694,15 @@ static int __init intel_pstate_init(void)
|
|||
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
|
||||
return -ENODEV;
|
||||
|
||||
/*
|
||||
* The Intel pstate driver will be ignored if the platform
|
||||
* firmware has its own power management modes.
|
||||
*/
|
||||
if (intel_pstate_platform_pwr_mgmt_exists()) {
|
||||
pr_info("P-states controlled by the platform\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
id = x86_match_cpu(hwp_support_ids);
|
||||
if (id) {
|
||||
hwp_forced = intel_pstate_hwp_is_enabled();
|
||||
|
|
@ -3743,15 +3758,6 @@ static int __init intel_pstate_init(void)
|
|||
default_driver = &intel_cpufreq;
|
||||
|
||||
hwp_cpu_matched:
|
||||
/*
|
||||
* The Intel pstate driver will be ignored if the platform
|
||||
* firmware has its own power management modes.
|
||||
*/
|
||||
if (intel_pstate_platform_pwr_mgmt_exists()) {
|
||||
pr_info("P-states controlled by the platform\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
if (!hwp_active && hwp_only)
|
||||
return -ENOTSUPP;
|
||||
|
||||
|
|
@ -3835,6 +3841,9 @@ static int __init intel_pstate_setup(char *str)
|
|||
if (!strcmp(str, "no_hwp"))
|
||||
no_hwp = 1;
|
||||
|
||||
if (!strcmp(str, "no_cas"))
|
||||
no_cas = true;
|
||||
|
||||
if (!strcmp(str, "force"))
|
||||
force_load = 1;
|
||||
if (!strcmp(str, "hwp_only"))
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user