mirror of
https://github.com/torvalds/linux.git
synced 2026-05-23 06:31:58 +02:00
Merge back cpufreq material for 6.19
This commit is contained in:
commit
377e38859c
|
|
@ -48,8 +48,9 @@ only way to pass early-configuration-time parameters to it is via the kernel
|
|||
command line. However, its configuration can be adjusted via ``sysfs`` to a
|
||||
great extent. In some configurations it even is possible to unregister it via
|
||||
``sysfs`` which allows another ``CPUFreq`` scaling driver to be loaded and
|
||||
registered (see `below <status_attr_>`_).
|
||||
registered (see :ref:`below <status_attr>`).
|
||||
|
||||
.. _operation_modes:
|
||||
|
||||
Operation Modes
|
||||
===============
|
||||
|
|
@ -62,6 +63,8 @@ a certain performance scaling algorithm. Which of them will be in effect
|
|||
depends on what kernel command line options are used and on the capabilities of
|
||||
the processor.
|
||||
|
||||
.. _active_mode:
|
||||
|
||||
Active Mode
|
||||
-----------
|
||||
|
||||
|
|
@ -94,6 +97,8 @@ Which of the P-state selection algorithms is used by default depends on the
|
|||
Namely, if that option is set, the ``performance`` algorithm will be used by
|
||||
default, and the other one will be used by default if it is not set.
|
||||
|
||||
.. _active_mode_hwp:
|
||||
|
||||
Active Mode With HWP
|
||||
~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
|
|
@ -123,7 +128,7 @@ Energy-Performance Bias (EPB) knob (otherwise), which means that the processor's
|
|||
internal P-state selection logic is expected to focus entirely on performance.
|
||||
|
||||
This will override the EPP/EPB setting coming from the ``sysfs`` interface
|
||||
(see `Energy vs Performance Hints`_ below). Moreover, any attempts to change
|
||||
(see :ref:`energy_performance_hints` below). Moreover, any attempts to change
|
||||
the EPP/EPB to a value different from 0 ("performance") via ``sysfs`` in this
|
||||
configuration will be rejected.
|
||||
|
||||
|
|
@ -192,6 +197,8 @@ This is the default P-state selection algorithm if the
|
|||
:c:macro:`CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE` kernel configuration option
|
||||
is not set.
|
||||
|
||||
.. _passive_mode:
|
||||
|
||||
Passive Mode
|
||||
------------
|
||||
|
||||
|
|
@ -289,12 +296,12 @@ Unlike ``_PSS`` objects in the ACPI tables, ``intel_pstate`` always exposes
|
|||
the entire range of available P-states, including the whole turbo range, to the
|
||||
``CPUFreq`` core and (in the passive mode) to generic scaling governors. This
|
||||
generally causes turbo P-states to be set more often when ``intel_pstate`` is
|
||||
used relative to ACPI-based CPU performance scaling (see `below <acpi-cpufreq_>`_
|
||||
for more information).
|
||||
used relative to ACPI-based CPU performance scaling (see
|
||||
:ref:`below <acpi-cpufreq>` for more information).
|
||||
|
||||
Moreover, since ``intel_pstate`` always knows what the real turbo threshold is
|
||||
(even if the Configurable TDP feature is enabled in the processor), its
|
||||
``no_turbo`` attribute in ``sysfs`` (described `below <no_turbo_attr_>`_) should
|
||||
``no_turbo`` attribute in ``sysfs`` (described :ref:`below <no_turbo_attr>`) should
|
||||
work as expected in all cases (that is, if set to disable turbo P-states, it
|
||||
always should prevent ``intel_pstate`` from using them).
|
||||
|
||||
|
|
@ -307,12 +314,12 @@ pieces of information on it to be known, including:
|
|||
|
||||
* The minimum supported P-state.
|
||||
|
||||
* The maximum supported `non-turbo P-state <turbo_>`_.
|
||||
* The maximum supported :ref:`non-turbo P-state <turbo>`.
|
||||
|
||||
* Whether or not turbo P-states are supported at all.
|
||||
|
||||
* The maximum supported `one-core turbo P-state <turbo_>`_ (if turbo P-states
|
||||
are supported).
|
||||
* The maximum supported :ref:`one-core turbo P-state <turbo>` (if turbo
|
||||
P-states are supported).
|
||||
|
||||
* The scaling formula to translate the driver's internal representation
|
||||
of P-states into frequencies and the other way around.
|
||||
|
|
@ -400,10 +407,10 @@ Energy-Aware Scheduling Support
|
|||
|
||||
If ``CONFIG_ENERGY_MODEL`` has been set during kernel configuration and
|
||||
``intel_pstate`` runs on a hybrid processor without SMT, in addition to enabling
|
||||
`CAS <CAS_>`_ it registers an Energy Model for the processor. This allows the
|
||||
:ref:`CAS` it registers an Energy Model for the processor. This allows the
|
||||
Energy-Aware Scheduling (EAS) support to be enabled in the CPU scheduler if
|
||||
``schedutil`` is used as the ``CPUFreq`` governor which requires ``intel_pstate``
|
||||
to operate in the `passive mode <Passive Mode_>`_.
|
||||
to operate in the :ref:`passive mode <passive_mode>`.
|
||||
|
||||
The Energy Model registered by ``intel_pstate`` is artificial (that is, it is
|
||||
based on abstract cost values and it does not include any real power numbers)
|
||||
|
|
@ -432,6 +439,8 @@ the ``energy_model`` directory in ``debugfs`` (typlically mounted on
|
|||
User Space Interface in ``sysfs``
|
||||
=================================
|
||||
|
||||
.. _global_attributes:
|
||||
|
||||
Global Attributes
|
||||
-----------------
|
||||
|
||||
|
|
@ -444,8 +453,8 @@ argument is passed to the kernel in the command line.
|
|||
|
||||
``max_perf_pct``
|
||||
Maximum P-state the driver is allowed to set in percent of the
|
||||
maximum supported performance level (the highest supported `turbo
|
||||
P-state <turbo_>`_).
|
||||
maximum supported performance level (the highest supported :ref:`turbo
|
||||
P-state <turbo>`).
|
||||
|
||||
This attribute will not be exposed if the
|
||||
``intel_pstate=per_cpu_perf_limits`` argument is present in the kernel
|
||||
|
|
@ -453,8 +462,8 @@ argument is passed to the kernel in the command line.
|
|||
|
||||
``min_perf_pct``
|
||||
Minimum P-state the driver is allowed to set in percent of the
|
||||
maximum supported performance level (the highest supported `turbo
|
||||
P-state <turbo_>`_).
|
||||
maximum supported performance level (the highest supported :ref:`turbo
|
||||
P-state <turbo>`).
|
||||
|
||||
This attribute will not be exposed if the
|
||||
``intel_pstate=per_cpu_perf_limits`` argument is present in the kernel
|
||||
|
|
@ -463,18 +472,18 @@ argument is passed to the kernel in the command line.
|
|||
``num_pstates``
|
||||
Number of P-states supported by the processor (between 0 and 255
|
||||
inclusive) including both turbo and non-turbo P-states (see
|
||||
`Turbo P-states Support`_).
|
||||
:ref:`turbo`).
|
||||
|
||||
This attribute is present only if the value exposed by it is the same
|
||||
for all of the CPUs in the system.
|
||||
|
||||
The value of this attribute is not affected by the ``no_turbo``
|
||||
setting described `below <no_turbo_attr_>`_.
|
||||
setting described :ref:`below <no_turbo_attr>`.
|
||||
|
||||
This attribute is read-only.
|
||||
|
||||
``turbo_pct``
|
||||
Ratio of the `turbo range <turbo_>`_ size to the size of the entire
|
||||
Ratio of the :ref:`turbo range <turbo>` size to the size of the entire
|
||||
range of supported P-states, in percent.
|
||||
|
||||
This attribute is present only if the value exposed by it is the same
|
||||
|
|
@ -486,7 +495,7 @@ argument is passed to the kernel in the command line.
|
|||
|
||||
``no_turbo``
|
||||
If set (equal to 1), the driver is not allowed to set any turbo P-states
|
||||
(see `Turbo P-states Support`_). If unset (equal to 0, which is the
|
||||
(see :ref:`turbo`). If unset (equal to 0, which is the
|
||||
default), turbo P-states can be set by the driver.
|
||||
[Note that ``intel_pstate`` does not support the general ``boost``
|
||||
attribute (supported by some other scaling drivers) which is replaced
|
||||
|
|
@ -495,11 +504,11 @@ argument is passed to the kernel in the command line.
|
|||
This attribute does not affect the maximum supported frequency value
|
||||
supplied to the ``CPUFreq`` core and exposed via the policy interface,
|
||||
but it affects the maximum possible value of per-policy P-state limits
|
||||
(see `Interpretation of Policy Attributes`_ below for details).
|
||||
(see :ref:`policy_attributes_interpretation` below for details).
|
||||
|
||||
``hwp_dynamic_boost``
|
||||
This attribute is only present if ``intel_pstate`` works in the
|
||||
`active mode with the HWP feature enabled <Active Mode With HWP_>`_ in
|
||||
:ref:`active mode with the HWP feature enabled <active_mode_hwp>` in
|
||||
the processor. If set (equal to 1), it causes the minimum P-state limit
|
||||
to be increased dynamically for a short time whenever a task previously
|
||||
waiting on I/O is selected to run on a given logical CPU (the purpose
|
||||
|
|
@ -514,12 +523,12 @@ argument is passed to the kernel in the command line.
|
|||
Operation mode of the driver: "active", "passive" or "off".
|
||||
|
||||
"active"
|
||||
The driver is functional and in the `active mode
|
||||
<Active Mode_>`_.
|
||||
The driver is functional and in the :ref:`active mode
|
||||
<active_mode>`.
|
||||
|
||||
"passive"
|
||||
The driver is functional and in the `passive mode
|
||||
<Passive Mode_>`_.
|
||||
The driver is functional and in the :ref:`passive mode
|
||||
<passive_mode>`.
|
||||
|
||||
"off"
|
||||
The driver is not functional (it is not registered as a scaling
|
||||
|
|
@ -547,13 +556,15 @@ argument is passed to the kernel in the command line.
|
|||
attribute to "1" enables the energy-efficiency optimizations and setting
|
||||
to "0" disables them.
|
||||
|
||||
.. _policy_attributes_interpretation:
|
||||
|
||||
Interpretation of Policy Attributes
|
||||
-----------------------------------
|
||||
|
||||
The interpretation of some ``CPUFreq`` policy attributes described in
|
||||
Documentation/admin-guide/pm/cpufreq.rst is special with ``intel_pstate``
|
||||
as the current scaling driver and it generally depends on the driver's
|
||||
`operation mode <Operation Modes_>`_.
|
||||
:ref:`operation mode <operation_modes>`.
|
||||
|
||||
First of all, the values of the ``cpuinfo_max_freq``, ``cpuinfo_min_freq`` and
|
||||
``scaling_cur_freq`` attributes are produced by applying a processor-specific
|
||||
|
|
@ -562,9 +573,10 @@ Also, the values of the ``scaling_max_freq`` and ``scaling_min_freq``
|
|||
attributes are capped by the frequency corresponding to the maximum P-state that
|
||||
the driver is allowed to set.
|
||||
|
||||
If the ``no_turbo`` `global attribute <no_turbo_attr_>`_ is set, the driver is
|
||||
not allowed to use turbo P-states, so the maximum value of ``scaling_max_freq``
|
||||
and ``scaling_min_freq`` is limited to the maximum non-turbo P-state frequency.
|
||||
If the ``no_turbo`` :ref:`global attribute <no_turbo_attr>` is set, the driver
|
||||
is not allowed to use turbo P-states, so the maximum value of
|
||||
``scaling_max_freq`` and ``scaling_min_freq`` is limited to the maximum
|
||||
non-turbo P-state frequency.
|
||||
Accordingly, setting ``no_turbo`` causes ``scaling_max_freq`` and
|
||||
``scaling_min_freq`` to go down to that value if they were above it before.
|
||||
However, the old values of ``scaling_max_freq`` and ``scaling_min_freq`` will be
|
||||
|
|
@ -576,7 +588,7 @@ and ``scaling_min_freq`` corresponds to the maximum supported turbo P-state,
|
|||
which also is the value of ``cpuinfo_max_freq`` in either case.
|
||||
|
||||
Next, the following policy attributes have special meaning if
|
||||
``intel_pstate`` works in the `active mode <Active Mode_>`_:
|
||||
``intel_pstate`` works in the :ref:`active mode <active_mode>`:
|
||||
|
||||
``scaling_available_governors``
|
||||
List of P-state selection algorithms provided by ``intel_pstate``.
|
||||
|
|
@ -597,20 +609,22 @@ processor:
|
|||
Shows the base frequency of the CPU. Any frequency above this will be
|
||||
in the turbo frequency range.
|
||||
|
||||
The meaning of these attributes in the `passive mode <Passive Mode_>`_ is the
|
||||
The meaning of these attributes in the :ref:`passive mode <passive_mode>` is the
|
||||
same as for other scaling drivers.
|
||||
|
||||
Additionally, the value of the ``scaling_driver`` attribute for ``intel_pstate``
|
||||
depends on the operation mode of the driver. Namely, it is either
|
||||
"intel_pstate" (in the `active mode <Active Mode_>`_) or "intel_cpufreq" (in the
|
||||
`passive mode <Passive Mode_>`_).
|
||||
"intel_pstate" (in the :ref:`active mode <active_mode>`) or "intel_cpufreq"
|
||||
(in the :ref:`passive mode <passive_mode>`).
|
||||
|
||||
.. _pstate_limits_coordination:
|
||||
|
||||
Coordination of P-State Limits
|
||||
------------------------------
|
||||
|
||||
``intel_pstate`` allows P-state limits to be set in two ways: with the help of
|
||||
the ``max_perf_pct`` and ``min_perf_pct`` `global attributes
|
||||
<Global Attributes_>`_ or via the ``scaling_max_freq`` and ``scaling_min_freq``
|
||||
the ``max_perf_pct`` and ``min_perf_pct`` :ref:`global attributes
|
||||
<global_attributes>` or via the ``scaling_max_freq`` and ``scaling_min_freq``
|
||||
``CPUFreq`` policy attributes. The coordination between those limits is based
|
||||
on the following rules, regardless of the current operation mode of the driver:
|
||||
|
||||
|
|
@ -632,17 +646,18 @@ on the following rules, regardless of the current operation mode of the driver:
|
|||
|
||||
3. The global and per-policy limits can be set independently.
|
||||
|
||||
In the `active mode with the HWP feature enabled <Active Mode With HWP_>`_, the
|
||||
In the :ref:`active mode with the HWP feature enabled <active_mode_hwp>`, the
|
||||
resulting effective values are written into hardware registers whenever the
|
||||
limits change in order to request its internal P-state selection logic to always
|
||||
set P-states within these limits. Otherwise, the limits are taken into account
|
||||
by scaling governors (in the `passive mode <Passive Mode_>`_) and by the driver
|
||||
every time before setting a new P-state for a CPU.
|
||||
by scaling governors (in the :ref:`passive mode <passive_mode>`) and by the
|
||||
driver every time before setting a new P-state for a CPU.
|
||||
|
||||
Additionally, if the ``intel_pstate=per_cpu_perf_limits`` command line argument
|
||||
is passed to the kernel, ``max_perf_pct`` and ``min_perf_pct`` are not exposed
|
||||
at all and the only way to set the limits is by using the policy attributes.
|
||||
|
||||
.. _energy_performance_hints:
|
||||
|
||||
Energy vs Performance Hints
|
||||
---------------------------
|
||||
|
|
@ -702,9 +717,9 @@ output.
|
|||
On those systems each ``_PSS`` object returns a list of P-states supported by
|
||||
the corresponding CPU which basically is a subset of the P-states range that can
|
||||
be used by ``intel_pstate`` on the same system, with one exception: the whole
|
||||
`turbo range <turbo_>`_ is represented by one item in it (the topmost one). By
|
||||
convention, the frequency returned by ``_PSS`` for that item is greater by 1 MHz
|
||||
than the frequency of the highest non-turbo P-state listed by it, but the
|
||||
:ref:`turbo range <turbo>` is represented by one item in it (the topmost one).
|
||||
By convention, the frequency returned by ``_PSS`` for that item is greater by
|
||||
1 MHz than the frequency of the highest non-turbo P-state listed by it, but the
|
||||
corresponding P-state representation (following the hardware specification)
|
||||
returned for it matches the maximum supported turbo P-state (or is the
|
||||
special value 255 meaning essentially "go as high as you can get").
|
||||
|
|
@ -730,18 +745,18 @@ benefit from running at turbo frequencies will be given non-turbo P-states
|
|||
instead.
|
||||
|
||||
One more issue related to that may appear on systems supporting the
|
||||
`Configurable TDP feature <turbo_>`_ allowing the platform firmware to set the
|
||||
turbo threshold. Namely, if that is not coordinated with the lists of P-states
|
||||
returned by ``_PSS`` properly, there may be more than one item corresponding to
|
||||
a turbo P-state in those lists and there may be a problem with avoiding the
|
||||
turbo range (if desirable or necessary). Usually, to avoid using turbo
|
||||
P-states overall, ``acpi-cpufreq`` simply avoids using the topmost state listed
|
||||
by ``_PSS``, but that is not sufficient when there are other turbo P-states in
|
||||
the list returned by it.
|
||||
:ref:`Configurable TDP feature <turbo>` allowing the platform firmware to set
|
||||
the turbo threshold. Namely, if that is not coordinated with the lists of
|
||||
P-states returned by ``_PSS`` properly, there may be more than one item
|
||||
corresponding to a turbo P-state in those lists and there may be a problem with
|
||||
avoiding the turbo range (if desirable or necessary). Usually, to avoid using
|
||||
turbo P-states overall, ``acpi-cpufreq`` simply avoids using the topmost state
|
||||
listed by ``_PSS``, but that is not sufficient when there are other turbo
|
||||
P-states in the list returned by it.
|
||||
|
||||
Apart from the above, ``acpi-cpufreq`` works like ``intel_pstate`` in the
|
||||
`passive mode <Passive Mode_>`_, except that the number of P-states it can set
|
||||
is limited to the ones listed by the ACPI ``_PSS`` objects.
|
||||
:ref:`passive mode <passive_mode>`, except that the number of P-states it can
|
||||
set is limited to the ones listed by the ACPI ``_PSS`` objects.
|
||||
|
||||
|
||||
Kernel Command Line Options for ``intel_pstate``
|
||||
|
|
@ -756,11 +771,11 @@ of them have to be prepended with the ``intel_pstate=`` prefix.
|
|||
processor is supported by it.
|
||||
|
||||
``active``
|
||||
Register ``intel_pstate`` in the `active mode <Active Mode_>`_ to start
|
||||
with.
|
||||
Register ``intel_pstate`` in the :ref:`active mode <active_mode>` to
|
||||
start with.
|
||||
|
||||
``passive``
|
||||
Register ``intel_pstate`` in the `passive mode <Passive Mode_>`_ to
|
||||
Register ``intel_pstate`` in the :ref:`passive mode <passive_mode>` to
|
||||
start with.
|
||||
|
||||
``force``
|
||||
|
|
@ -793,12 +808,12 @@ of them have to be prepended with the ``intel_pstate=`` prefix.
|
|||
and this option has no effect.
|
||||
|
||||
``per_cpu_perf_limits``
|
||||
Use per-logical-CPU P-State limits (see `Coordination of P-state
|
||||
Limits`_ for details).
|
||||
Use per-logical-CPU P-State limits (see
|
||||
:ref:`pstate_limits_coordination` for details).
|
||||
|
||||
``no_cas``
|
||||
Do not enable `capacity-aware scheduling <CAS_>`_ which is enabled by
|
||||
default on hybrid systems without SMT.
|
||||
Do not enable :ref:`capacity-aware scheduling <CAS>` which is enabled
|
||||
by default on hybrid systems without SMT.
|
||||
|
||||
Diagnostics and Tuning
|
||||
======================
|
||||
|
|
@ -810,7 +825,7 @@ There are two static trace events that can be used for ``intel_pstate``
|
|||
diagnostics. One of them is the ``cpu_frequency`` trace event generally used
|
||||
by ``CPUFreq``, and the other one is the ``pstate_sample`` trace event specific
|
||||
to ``intel_pstate``. Both of them are triggered by ``intel_pstate`` only if
|
||||
it works in the `active mode <Active Mode_>`_.
|
||||
it works in the :ref:`active mode <active_mode>`.
|
||||
|
||||
The following sequence of shell commands can be used to enable them and see
|
||||
their output (if the kernel is generally configured to support event tracing)::
|
||||
|
|
@ -822,7 +837,7 @@ their output (if the kernel is generally configured to support event tracing)::
|
|||
gnome-terminal--4510 [001] ..s. 1177.680733: pstate_sample: core_busy=107 scaled=94 from=26 to=26 mperf=1143818 aperf=1230607 tsc=29838618 freq=2474476
|
||||
cat-5235 [002] ..s. 1177.681723: cpu_frequency: state=2900000 cpu_id=2
|
||||
|
||||
If ``intel_pstate`` works in the `passive mode <Passive Mode_>`_, the
|
||||
If ``intel_pstate`` works in the :ref:`passive mode <passive_mode>`, the
|
||||
``cpu_frequency`` trace event will be triggered either by the ``schedutil``
|
||||
scaling governor (for the policies it is attached to), or by the ``CPUFreq``
|
||||
core (for the policies with other scaling governors).
|
||||
|
|
|
|||
|
|
@ -1421,9 +1421,12 @@ static int cpufreq_policy_online(struct cpufreq_policy *policy,
|
|||
* If there is a problem with its frequency table, take it
|
||||
* offline and drop it.
|
||||
*/
|
||||
ret = cpufreq_table_validate_and_sort(policy);
|
||||
if (ret)
|
||||
goto out_offline_policy;
|
||||
if (policy->freq_table_sorted != CPUFREQ_TABLE_SORTED_ASCENDING &&
|
||||
policy->freq_table_sorted != CPUFREQ_TABLE_SORTED_DESCENDING) {
|
||||
ret = cpufreq_table_validate_and_sort(policy);
|
||||
if (ret)
|
||||
goto out_offline_policy;
|
||||
}
|
||||
|
||||
/* related_cpus should at least include policy->cpus. */
|
||||
cpumask_copy(policy->related_cpus, policy->cpus);
|
||||
|
|
@ -2550,7 +2553,7 @@ void cpufreq_unregister_governor(struct cpufreq_governor *governor)
|
|||
for_each_inactive_policy(policy) {
|
||||
if (!strcmp(policy->last_governor, governor->name)) {
|
||||
policy->governor = NULL;
|
||||
strcpy(policy->last_governor, "\0");
|
||||
policy->last_governor[0] = '\0';
|
||||
}
|
||||
}
|
||||
read_unlock_irqrestore(&cpufreq_driver_lock, flags);
|
||||
|
|
|
|||
|
|
@ -575,13 +575,18 @@ static void intel_pstate_hybrid_hwp_adjust(struct cpudata *cpu)
|
|||
int scaling = cpu->pstate.scaling;
|
||||
int freq;
|
||||
|
||||
pr_debug("CPU%d: perf_ctl_max_phys = %d\n", cpu->cpu, perf_ctl_max_phys);
|
||||
pr_debug("CPU%d: perf_ctl_turbo = %d\n", cpu->cpu, perf_ctl_turbo);
|
||||
pr_debug("CPU%d: perf_ctl_scaling = %d\n", cpu->cpu, perf_ctl_scaling);
|
||||
pr_debug("CPU%d: PERF_CTL max_phys = %d\n", cpu->cpu, perf_ctl_max_phys);
|
||||
pr_debug("CPU%d: PERF_CTL turbo = %d\n", cpu->cpu, perf_ctl_turbo);
|
||||
pr_debug("CPU%d: PERF_CTL scaling = %d\n", cpu->cpu, perf_ctl_scaling);
|
||||
pr_debug("CPU%d: HWP_CAP guaranteed = %d\n", cpu->cpu, cpu->pstate.max_pstate);
|
||||
pr_debug("CPU%d: HWP_CAP highest = %d\n", cpu->cpu, cpu->pstate.turbo_pstate);
|
||||
pr_debug("CPU%d: HWP-to-frequency scaling factor: %d\n", cpu->cpu, scaling);
|
||||
|
||||
if (scaling == perf_ctl_scaling)
|
||||
return;
|
||||
|
||||
hwp_is_hybrid = true;
|
||||
|
||||
cpu->pstate.turbo_freq = rounddown(cpu->pstate.turbo_pstate * scaling,
|
||||
perf_ctl_scaling);
|
||||
cpu->pstate.max_freq = rounddown(cpu->pstate.max_pstate * scaling,
|
||||
|
|
@ -909,6 +914,11 @@ static struct freq_attr *hwp_cpufreq_attrs[] = {
|
|||
[HWP_CPUFREQ_ATTR_COUNT] = NULL,
|
||||
};
|
||||
|
||||
static u8 hybrid_get_cpu_type(unsigned int cpu)
|
||||
{
|
||||
return cpu_data(cpu).topo.intel_type;
|
||||
}
|
||||
|
||||
static bool no_cas __ro_after_init;
|
||||
|
||||
static struct cpudata *hybrid_max_perf_cpu __read_mostly;
|
||||
|
|
@ -925,11 +935,8 @@ static int hybrid_active_power(struct device *dev, unsigned long *power,
|
|||
unsigned long *freq)
|
||||
{
|
||||
/*
|
||||
* Create "utilization bins" of 0-40%, 40%-60%, 60%-80%, and 80%-100%
|
||||
* of the maximum capacity such that two CPUs of the same type will be
|
||||
* regarded as equally attractive if the utilization of each of them
|
||||
* falls into the same bin, which should prevent tasks from being
|
||||
* migrated between them too often.
|
||||
* Create four "states" corresponding to 40%, 60%, 80%, and 100% of the
|
||||
* full capacity.
|
||||
*
|
||||
* For this purpose, return the "frequency" of 2 for the first
|
||||
* performance level and otherwise leave the value set by the caller.
|
||||
|
|
@ -943,38 +950,40 @@ static int hybrid_active_power(struct device *dev, unsigned long *power,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static bool hybrid_has_l3(unsigned int cpu)
|
||||
{
|
||||
struct cpu_cacheinfo *cacheinfo = get_cpu_cacheinfo(cpu);
|
||||
unsigned int i;
|
||||
|
||||
if (!cacheinfo)
|
||||
return false;
|
||||
|
||||
for (i = 0; i < cacheinfo->num_leaves; i++) {
|
||||
if (cacheinfo->info_list[i].level == 3)
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static int hybrid_get_cost(struct device *dev, unsigned long freq,
|
||||
unsigned long *cost)
|
||||
{
|
||||
struct pstate_data *pstate = &all_cpu_data[dev->id]->pstate;
|
||||
struct cpu_cacheinfo *cacheinfo = get_cpu_cacheinfo(dev->id);
|
||||
|
||||
/* Facilitate load balancing between CPUs of the same type. */
|
||||
*cost = freq;
|
||||
/*
|
||||
* The smaller the perf-to-frequency scaling factor, the larger the IPC
|
||||
* ratio between the given CPU and the least capable CPU in the system.
|
||||
* Regard that IPC ratio as the primary cost component and assume that
|
||||
* the scaling factors for different CPU types will differ by at least
|
||||
* 5% and they will not be above INTEL_PSTATE_CORE_SCALING.
|
||||
* Adjust the cost depending on CPU type.
|
||||
*
|
||||
* Add the freq value to the cost, so that the cost of running on CPUs
|
||||
* of the same type in different "utilization bins" is different.
|
||||
* The idea is to start loading up LPE-cores before E-cores and start
|
||||
* to populate E-cores when LPE-cores are utilized above 60% of the
|
||||
* capacity. Similarly, P-cores start to be populated when E-cores are
|
||||
* utilized above 60% of the capacity.
|
||||
*/
|
||||
*cost = div_u64(100ULL * INTEL_PSTATE_CORE_SCALING, pstate->scaling) + freq;
|
||||
/*
|
||||
* Increase the cost slightly for CPUs able to access L3 to avoid
|
||||
* touching it in case some other CPUs of the same type can do the work
|
||||
* without it.
|
||||
*/
|
||||
if (cacheinfo) {
|
||||
unsigned int i;
|
||||
|
||||
/* Check if L3 cache is there. */
|
||||
for (i = 0; i < cacheinfo->num_leaves; i++) {
|
||||
if (cacheinfo->info_list[i].level == 3) {
|
||||
*cost += 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (hybrid_get_cpu_type(dev->id) == INTEL_CPU_TYPE_ATOM) {
|
||||
if (hybrid_has_l3(dev->id)) /* E-core */
|
||||
*cost += 1;
|
||||
} else { /* P-core */
|
||||
*cost += 2;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
@ -1037,9 +1046,9 @@ static void hybrid_set_cpu_capacity(struct cpudata *cpu)
|
|||
|
||||
topology_set_cpu_scale(cpu->cpu, arch_scale_cpu_capacity(cpu->cpu));
|
||||
|
||||
pr_debug("CPU%d: perf = %u, max. perf = %u, base perf = %d\n", cpu->cpu,
|
||||
cpu->capacity_perf, hybrid_max_perf_cpu->capacity_perf,
|
||||
cpu->pstate.max_pstate_physical);
|
||||
pr_debug("CPU%d: capacity perf = %u, base perf = %u, sys max perf = %u\n",
|
||||
cpu->cpu, cpu->capacity_perf, cpu->pstate.max_pstate_physical,
|
||||
hybrid_max_perf_cpu->capacity_perf);
|
||||
}
|
||||
|
||||
static void hybrid_clear_cpu_capacity(unsigned int cpunum)
|
||||
|
|
@ -2297,18 +2306,14 @@ static int knl_get_turbo_pstate(int cpu)
|
|||
static int hwp_get_cpu_scaling(int cpu)
|
||||
{
|
||||
if (hybrid_scaling_factor) {
|
||||
struct cpuinfo_x86 *c = &cpu_data(cpu);
|
||||
u8 cpu_type = c->topo.intel_type;
|
||||
|
||||
/*
|
||||
* Return the hybrid scaling factor for P-cores and use the
|
||||
* default core scaling for E-cores.
|
||||
*/
|
||||
if (cpu_type == INTEL_CPU_TYPE_CORE)
|
||||
if (hybrid_get_cpu_type(cpu) == INTEL_CPU_TYPE_CORE)
|
||||
return hybrid_scaling_factor;
|
||||
|
||||
if (cpu_type == INTEL_CPU_TYPE_ATOM)
|
||||
return core_get_scaling();
|
||||
return core_get_scaling();
|
||||
}
|
||||
|
||||
/* Use core scaling on non-hybrid systems. */
|
||||
|
|
@ -2343,11 +2348,10 @@ static void intel_pstate_set_min_pstate(struct cpudata *cpu)
|
|||
|
||||
static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
|
||||
{
|
||||
int perf_ctl_max_phys = pstate_funcs.get_max_physical(cpu->cpu);
|
||||
int perf_ctl_scaling = pstate_funcs.get_scaling();
|
||||
|
||||
cpu->pstate.max_pstate_physical = pstate_funcs.get_max_physical(cpu->cpu);
|
||||
cpu->pstate.min_pstate = pstate_funcs.get_min(cpu->cpu);
|
||||
cpu->pstate.max_pstate_physical = perf_ctl_max_phys;
|
||||
cpu->pstate.perf_ctl_scaling = perf_ctl_scaling;
|
||||
|
||||
if (hwp_active && !hwp_mode_bdw) {
|
||||
|
|
@ -2355,10 +2359,7 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
|
|||
|
||||
if (pstate_funcs.get_cpu_scaling) {
|
||||
cpu->pstate.scaling = pstate_funcs.get_cpu_scaling(cpu->cpu);
|
||||
if (cpu->pstate.scaling != perf_ctl_scaling) {
|
||||
intel_pstate_hybrid_hwp_adjust(cpu);
|
||||
hwp_is_hybrid = true;
|
||||
}
|
||||
intel_pstate_hybrid_hwp_adjust(cpu);
|
||||
} else {
|
||||
cpu->pstate.scaling = perf_ctl_scaling;
|
||||
}
|
||||
|
|
@ -2760,6 +2761,7 @@ static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = {
|
|||
X86_MATCH(INTEL_ATOM_CRESTMONT, core_funcs),
|
||||
X86_MATCH(INTEL_ATOM_CRESTMONT_X, core_funcs),
|
||||
X86_MATCH(INTEL_ATOM_DARKMONT_X, core_funcs),
|
||||
X86_MATCH(INTEL_DIAMONDRAPIDS_X, core_funcs),
|
||||
{}
|
||||
};
|
||||
#endif
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user