From 7911b6a868ea1fc7e4e4929edaee312a2f061905 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 21 Feb 2023 16:05:42 +0100 Subject: [PATCH 01/29] dt-bindings: cpufreq: qcom-hw: add a compatible for sa8775p Add the compatible for the cpufreq engine present on sa8775p platforms. Signed-off-by: Bartosz Golaszewski Reviewed-by: Krzysztof Kozlowski Signed-off-by: Viresh Kumar --- Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml b/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml index e4aa8c67d532..92693c33edf9 100644 --- a/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml +++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml @@ -26,6 +26,7 @@ properties: items: - enum: - qcom,qdu1000-cpufreq-epss + - qcom,sa8775p-cpufreq-epss - qcom,sc7280-cpufreq-epss - qcom,sc8280xp-cpufreq-epss - qcom,sm6375-cpufreq-epss From f35512777e31085f1ade7febed6c2038adb1fb07 Mon Sep 17 00:00:00 2001 From: Nick Alcock Date: Wed, 22 Feb 2023 12:14:40 +0000 Subject: [PATCH 02/29] kbuild, cpufreq: tegra124: remove MODULE_LICENSE in non-modules Since commit 8b41fc4454e ("kbuild: create modules.builtin without Makefile.modbuiltin or tristate.conf"), MODULE_LICENSE declarations are used to identify modules. As a consequence, uses of the macro in non-modules will cause modprobe to misidentify their containing object file as a module when it is not (false positives), and modprobe might succeed rather than failing with a suitable error message. So remove it in the files in this commit, none of which can be built as modules. Signed-off-by: Nick Alcock Suggested-by: Luis Chamberlain Cc: Luis Chamberlain Cc: linux-modules@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: Hitomi Hasegawa Cc: "Rafael J. Wysocki" Cc: Viresh Kumar Cc: Thierry Reding Cc: Jonathan Hunter Cc: linux-pm@vger.kernel.org Cc: linux-tegra@vger.kernel.org Signed-off-by: Viresh Kumar --- drivers/cpufreq/tegra124-cpufreq.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/cpufreq/tegra124-cpufreq.c b/drivers/cpufreq/tegra124-cpufreq.c index 7a1ea6fdcab6..788672c0c834 100644 --- a/drivers/cpufreq/tegra124-cpufreq.c +++ b/drivers/cpufreq/tegra124-cpufreq.c @@ -221,4 +221,3 @@ module_init(tegra_cpufreq_init); MODULE_AUTHOR("Tuomas Tynkkynen "); MODULE_DESCRIPTION("cpufreq driver for NVIDIA Tegra124"); -MODULE_LICENSE("GPL v2"); From df97441673f303d06dd33c04d8d5ee54ae6b2a82 Mon Sep 17 00:00:00 2001 From: Nick Alcock Date: Wed, 22 Feb 2023 12:14:41 +0000 Subject: [PATCH 03/29] kbuild, cpufreq: remove MODULE_LICENSE in non-modules Since commit 8b41fc4454e ("kbuild: create modules.builtin without Makefile.modbuiltin or tristate.conf"), MODULE_LICENSE declarations are used to identify modules. As a consequence, uses of the macro in non-modules will cause modprobe to misidentify their containing object file as a module when it is not (false positives), and modprobe might succeed rather than failing with a suitable error message. So remove it in the files in this commit, none of which can be built as modules. Signed-off-by: Nick Alcock Suggested-by: Luis Chamberlain Cc: Luis Chamberlain Cc: linux-modules@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: Hitomi Hasegawa Cc: "Rafael J. Wysocki" Cc: Viresh Kumar Cc: linux-pm@vger.kernel.org Signed-off-by: Viresh Kumar --- drivers/cpufreq/freq_table.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c index 67e56cf638ef..90bfc27ed1ba 100644 --- a/drivers/cpufreq/freq_table.c +++ b/drivers/cpufreq/freq_table.c @@ -367,4 +367,3 @@ int cpufreq_table_validate_and_sort(struct cpufreq_policy *policy) MODULE_AUTHOR("Dominik Brodowski "); MODULE_DESCRIPTION("CPUfreq frequency table helpers"); -MODULE_LICENSE("GPL"); From 35527c677cb6bbaa6489b8560c8b4316fdd60792 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Thu, 16 Feb 2023 11:51:40 +0100 Subject: [PATCH 04/29] cpufreq: qcom-hw: Simplify counting frequency domains For quite some time, this driver has been performing some quite low-level DT operations. Simplify that using platform_get_resource. Signed-off-by: Konrad Dybcio Reviewed-by: Bjorn Andersson Signed-off-by: Viresh Kumar --- drivers/cpufreq/qcom-cpufreq-hw.c | 29 ++++++----------------------- 1 file changed, 6 insertions(+), 23 deletions(-) diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index 2f581d2d617d..575a4461c25a 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -29,6 +29,8 @@ #define GT_IRQ_STATUS BIT(2) +#define MAX_FREQ_DOMAINS 3 + struct qcom_cpufreq_soc_data { u32 reg_enable; u32 reg_domain_state; @@ -651,10 +653,9 @@ static int qcom_cpufreq_hw_driver_probe(struct platform_device *pdev) { struct clk_hw_onecell_data *clk_data; struct device *dev = &pdev->dev; - struct device_node *soc_node; struct device *cpu_dev; struct clk *clk; - int ret, i, num_domains, reg_sz; + int ret, i, num_domains; clk = clk_get(dev, "xo"); if (IS_ERR(clk)) @@ -681,24 +682,9 @@ static int qcom_cpufreq_hw_driver_probe(struct platform_device *pdev) if (ret) return ret; - /* Allocate qcom_cpufreq_data based on the available frequency domains in DT */ - soc_node = of_get_parent(dev->of_node); - if (!soc_node) - return -EINVAL; - - ret = of_property_read_u32(soc_node, "#address-cells", ®_sz); - if (ret) - goto of_exit; - - ret = of_property_read_u32(soc_node, "#size-cells", &i); - if (ret) - goto of_exit; - - reg_sz += i; - - num_domains = of_property_count_elems_of_size(dev->of_node, "reg", sizeof(u32) * reg_sz); - if (num_domains <= 0) - return num_domains; + for (num_domains = 0; num_domains < MAX_FREQ_DOMAINS; num_domains++) + if (!platform_get_resource(pdev, IORESOURCE_MEM, num_domains)) + break; qcom_cpufreq.data = devm_kzalloc(dev, sizeof(struct qcom_cpufreq_data) * num_domains, GFP_KERNEL); @@ -762,9 +748,6 @@ static int qcom_cpufreq_hw_driver_probe(struct platform_device *pdev) else dev_dbg(dev, "QCOM CPUFreq HW driver initialized\n"); -of_exit: - of_node_put(soc_node); - return ret; } From b8f3a396a7ee43e6079176cc0fb8de2b95a23681 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 10 Mar 2023 08:47:02 -0600 Subject: [PATCH 05/29] cpufreq: Use of_property_present() for testing DT property presence It is preferred to use typed property access functions (i.e. of_property_read_ functions) rather than low-level of_get_property/of_find_property functions for reading properties. As part of this, convert of_get_property/of_find_property calls to the recently added of_property_present() helper when we just want to test for presence of a property and nothing more. Signed-off-by: Rob Herring Signed-off-by: Viresh Kumar --- drivers/cpufreq/cpufreq-dt-platdev.c | 2 +- drivers/cpufreq/imx-cpufreq-dt.c | 2 +- drivers/cpufreq/imx6q-cpufreq.c | 4 ++-- drivers/cpufreq/scmi-cpufreq.c | 2 +- drivers/cpufreq/tegra20-cpufreq.c | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index e85703651098..5ac6b9e5270e 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -179,7 +179,7 @@ static bool __init cpu0_node_has_opp_v2_prop(void) struct device_node *np = of_cpu_device_node_get(0); bool ret = false; - if (of_get_property(np, "operating-points-v2", NULL)) + if (of_property_present(np, "operating-points-v2")) ret = true; of_node_put(np); diff --git a/drivers/cpufreq/imx-cpufreq-dt.c b/drivers/cpufreq/imx-cpufreq-dt.c index 76e553af2071..535867a7dfdd 100644 --- a/drivers/cpufreq/imx-cpufreq-dt.c +++ b/drivers/cpufreq/imx-cpufreq-dt.c @@ -89,7 +89,7 @@ static int imx_cpufreq_dt_probe(struct platform_device *pdev) cpu_dev = get_cpu_device(0); - if (!of_find_property(cpu_dev->of_node, "cpu-supply", NULL)) + if (!of_property_present(cpu_dev->of_node, "cpu-supply")) return -ENODEV; if (of_machine_is_compatible("fsl,imx7ulp")) { diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c index ad4ce8493144..48e1772e98fd 100644 --- a/drivers/cpufreq/imx6q-cpufreq.c +++ b/drivers/cpufreq/imx6q-cpufreq.c @@ -222,7 +222,7 @@ static int imx6q_opp_check_speed_grading(struct device *dev) u32 val; int ret; - if (of_find_property(dev->of_node, "nvmem-cells", NULL)) { + if (of_property_present(dev->of_node, "nvmem-cells")) { ret = nvmem_cell_read_u32(dev, "speed_grade", &val); if (ret) return ret; @@ -279,7 +279,7 @@ static int imx6ul_opp_check_speed_grading(struct device *dev) u32 val; int ret = 0; - if (of_find_property(dev->of_node, "nvmem-cells", NULL)) { + if (of_property_present(dev->of_node, "nvmem-cells")) { ret = nvmem_cell_read_u32(dev, "speed_grade", &val); if (ret) return ret; diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c index 513a071845c2..f34e6382a4c5 100644 --- a/drivers/cpufreq/scmi-cpufreq.c +++ b/drivers/cpufreq/scmi-cpufreq.c @@ -310,7 +310,7 @@ static int scmi_cpufreq_probe(struct scmi_device *sdev) #ifdef CONFIG_COMMON_CLK /* dummy clock provider as needed by OPP if clocks property is used */ - if (of_find_property(dev->of_node, "#clock-cells", NULL)) + if (of_property_present(dev->of_node, "#clock-cells")) devm_of_clk_add_hw_provider(dev, of_clk_hw_simple_get, NULL); #endif diff --git a/drivers/cpufreq/tegra20-cpufreq.c b/drivers/cpufreq/tegra20-cpufreq.c index ab7ac7df9e62..dfd2de4f8e07 100644 --- a/drivers/cpufreq/tegra20-cpufreq.c +++ b/drivers/cpufreq/tegra20-cpufreq.c @@ -25,7 +25,7 @@ static bool cpu0_node_has_opp_v2_prop(void) struct device_node *np = of_cpu_device_node_get(0); bool ret = false; - if (of_get_property(np, "operating-points-v2", NULL)) + if (of_property_present(np, "operating-points-v2")) ret = true; of_node_put(np); From 1f5e62f5fb217f2c1e003236be7d03cf606c26c4 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 2 Mar 2023 20:14:11 -0800 Subject: [PATCH 06/29] cpufreq: intel_pstate: Enable HWP IO boost for all servers The HWP IO boost results in slight improvements for IO performance on both Ice Lake and Sapphire Rapid servers. Currently there is a CPU model check for Skylake desktop and server along with the ACPI PM profile for performance and enterprise servers to enable IO boost. Remove the CPU model check, so that all current server models enable HWP IO boost by default. Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 48a4613cef1e..2a1a6779d82d 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2384,12 +2384,6 @@ static const struct x86_cpu_id intel_pstate_cpu_ee_disable_ids[] = { {} }; -static const struct x86_cpu_id intel_pstate_hwp_boost_ids[] = { - X86_MATCH(SKYLAKE_X, core_funcs), - X86_MATCH(SKYLAKE, core_funcs), - {} -}; - static int intel_pstate_init_cpu(unsigned int cpunum) { struct cpudata *cpu; @@ -2408,12 +2402,9 @@ static int intel_pstate_init_cpu(unsigned int cpunum) cpu->epp_default = -EINVAL; if (hwp_active) { - const struct x86_cpu_id *id; - intel_pstate_hwp_enable(cpu); - id = x86_match_cpu(intel_pstate_hwp_boost_ids); - if (id && intel_pstate_acpi_pm_profile_server()) + if (intel_pstate_acpi_pm_profile_server()) hwp_boost = true; } } else if (hwp_active) { From 76531df5e13b5b997a17940d6980a168c616e962 Mon Sep 17 00:00:00 2001 From: Wyes Karny Date: Tue, 7 Mar 2023 11:27:35 +0000 Subject: [PATCH 07/29] ACPI: CPPC: Add min and max perf register writing support Currently writing of min and max perf register is deferred in cppc_set_perf function. In CPPC guided mode, these registers needed to be written to guide the platform about min and max perf levels. Add this support to make guided mode work properly on AMD shared memory systems. Acked-by: Huang Rui Reviewed-by: Mario Limonciello Tested-by: Oleksandr Natalenko Signed-off-by: Wyes Karny [ rjw: Fixed up a multiline comment, subject edits ] Signed-off-by: Rafael J. Wysocki --- drivers/acpi/cppc_acpi.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index c51d3ccb4cca..523bad4e3a0c 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -1488,7 +1488,7 @@ EXPORT_SYMBOL_GPL(cppc_set_enable); int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls) { struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpu); - struct cpc_register_resource *desired_reg; + struct cpc_register_resource *desired_reg, *min_perf_reg, *max_perf_reg; int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu); struct cppc_pcc_data *pcc_ss_data = NULL; int ret = 0; @@ -1499,6 +1499,8 @@ int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls) } desired_reg = &cpc_desc->cpc_regs[DESIRED_PERF]; + min_perf_reg = &cpc_desc->cpc_regs[MIN_PERF]; + max_perf_reg = &cpc_desc->cpc_regs[MAX_PERF]; /* * This is Phase-I where we want to write to CPC registers @@ -1507,7 +1509,7 @@ int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls) * Since read_lock can be acquired by multiple CPUs simultaneously we * achieve that goal here */ - if (CPC_IN_PCC(desired_reg)) { + if (CPC_IN_PCC(desired_reg) || CPC_IN_PCC(min_perf_reg) || CPC_IN_PCC(max_perf_reg)) { if (pcc_ss_id < 0) { pr_debug("Invalid pcc_ss_id\n"); return -ENODEV; @@ -1530,13 +1532,19 @@ int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls) cpc_desc->write_cmd_status = 0; } - /* - * Skip writing MIN/MAX until Linux knows how to come up with - * useful values. - */ cpc_write(cpu, desired_reg, perf_ctrls->desired_perf); - if (CPC_IN_PCC(desired_reg)) + /* + * Only write if min_perf and max_perf not zero. Some drivers pass zero + * value to min and max perf, but they don't mean to set the zero value, + * they just don't want to write to those registers. + */ + if (perf_ctrls->min_perf) + cpc_write(cpu, min_perf_reg, perf_ctrls->min_perf); + if (perf_ctrls->max_perf) + cpc_write(cpu, max_perf_reg, perf_ctrls->max_perf); + + if (CPC_IN_PCC(desired_reg) || CPC_IN_PCC(min_perf_reg) || CPC_IN_PCC(max_perf_reg)) up_read(&pcc_ss_data->pcc_lock); /* END Phase-I */ /* * This is Phase-II where we transfer the ownership of PCC to Platform @@ -1584,7 +1592,7 @@ int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls) * case during a CMD_READ and if there are pending writes it delivers * the write command before servicing the read command */ - if (CPC_IN_PCC(desired_reg)) { + if (CPC_IN_PCC(desired_reg) || CPC_IN_PCC(min_perf_reg) || CPC_IN_PCC(max_perf_reg)) { if (down_write_trylock(&pcc_ss_data->pcc_lock)) {/* BEGIN Phase-II */ /* Update only if there are pending write commands */ if (pcc_ss_data->pending_pcc_write_cmd) From c984f5d5d45bd5f80d6a9d8541e809300c963aca Mon Sep 17 00:00:00 2001 From: Wyes Karny Date: Tue, 7 Mar 2023 11:27:36 +0000 Subject: [PATCH 08/29] ACPI: CPPC: Add auto select register read/write support For some AMD shared memory based systems, the autonomous selection bit needed to be set explicitly. Add autonomous selection register related APIs to acpi driver, which amd_pstate driver uses later. Acked-by: Huang Rui Reviewed-by: Mario Limonciello Tested-by: Oleksandr Natalenko Signed-off-by: Wyes Karny [ rjw: Fixed up kerneldoc comments, white space adjustment, subject edits ] Signed-off-by: Rafael J. Wysocki --- drivers/acpi/cppc_acpi.c | 96 ++++++++++++++++++++++++++++++++++++++++ include/acpi/cppc_acpi.h | 11 +++++ 2 files changed, 107 insertions(+) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 523bad4e3a0c..7ff269a78c20 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -1433,6 +1433,102 @@ int cppc_set_epp_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls, bool enable) } EXPORT_SYMBOL_GPL(cppc_set_epp_perf); +/** + * cppc_get_auto_sel_caps - Read autonomous selection register. + * @cpunum : CPU from which to read register. + * @perf_caps : struct where autonomous selection register value is updated. + */ +int cppc_get_auto_sel_caps(int cpunum, struct cppc_perf_caps *perf_caps) +{ + struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpunum); + struct cpc_register_resource *auto_sel_reg; + u64 auto_sel; + + if (!cpc_desc) { + pr_debug("No CPC descriptor for CPU:%d\n", cpunum); + return -ENODEV; + } + + auto_sel_reg = &cpc_desc->cpc_regs[AUTO_SEL_ENABLE]; + + if (!CPC_SUPPORTED(auto_sel_reg)) + pr_warn_once("Autonomous mode is not unsupported!\n"); + + if (CPC_IN_PCC(auto_sel_reg)) { + int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpunum); + struct cppc_pcc_data *pcc_ss_data = NULL; + int ret = 0; + + if (pcc_ss_id < 0) + return -ENODEV; + + pcc_ss_data = pcc_data[pcc_ss_id]; + + down_write(&pcc_ss_data->pcc_lock); + + if (send_pcc_cmd(pcc_ss_id, CMD_READ) >= 0) { + cpc_read(cpunum, auto_sel_reg, &auto_sel); + perf_caps->auto_sel = (bool)auto_sel; + } else { + ret = -EIO; + } + + up_write(&pcc_ss_data->pcc_lock); + + return ret; + } + + return 0; +} +EXPORT_SYMBOL_GPL(cppc_get_auto_sel_caps); + +/** + * cppc_set_auto_sel - Write autonomous selection register. + * @cpu : CPU to which to write register. + * @enable : the desired value of autonomous selection resiter to be updated. + */ +int cppc_set_auto_sel(int cpu, bool enable) +{ + int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu); + struct cpc_register_resource *auto_sel_reg; + struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpu); + struct cppc_pcc_data *pcc_ss_data = NULL; + int ret = -EINVAL; + + if (!cpc_desc) { + pr_debug("No CPC descriptor for CPU:%d\n", cpu); + return -ENODEV; + } + + auto_sel_reg = &cpc_desc->cpc_regs[AUTO_SEL_ENABLE]; + + if (CPC_IN_PCC(auto_sel_reg)) { + if (pcc_ss_id < 0) { + pr_debug("Invalid pcc_ss_id\n"); + return -ENODEV; + } + + if (CPC_SUPPORTED(auto_sel_reg)) { + ret = cpc_write(cpu, auto_sel_reg, enable); + if (ret) + return ret; + } + + pcc_ss_data = pcc_data[pcc_ss_id]; + + down_write(&pcc_ss_data->pcc_lock); + /* after writing CPC, transfer the ownership of PCC to platform */ + ret = send_pcc_cmd(pcc_ss_id, CMD_WRITE); + up_write(&pcc_ss_data->pcc_lock); + } else { + ret = -ENOTSUPP; + pr_debug("_CPC in PCC is not supported\n"); + } + + return ret; +} +EXPORT_SYMBOL_GPL(cppc_set_auto_sel); + /** * cppc_set_enable - Set to enable CPPC on the processor by writing the * Continuous Performance Control package EnableRegister field. diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h index 6b487a5bd638..6126c977ece0 100644 --- a/include/acpi/cppc_acpi.h +++ b/include/acpi/cppc_acpi.h @@ -109,6 +109,7 @@ struct cppc_perf_caps { u32 lowest_freq; u32 nominal_freq; u32 energy_perf; + bool auto_sel; }; struct cppc_perf_ctrls { @@ -153,6 +154,8 @@ extern int cpc_read_ffh(int cpunum, struct cpc_reg *reg, u64 *val); extern int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val); extern int cppc_get_epp_perf(int cpunum, u64 *epp_perf); extern int cppc_set_epp_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls, bool enable); +extern int cppc_get_auto_sel_caps(int cpunum, struct cppc_perf_caps *perf_caps); +extern int cppc_set_auto_sel(int cpu, bool enable); #else /* !CONFIG_ACPI_CPPC_LIB */ static inline int cppc_get_desired_perf(int cpunum, u64 *desired_perf) { @@ -214,6 +217,14 @@ static inline int cppc_get_epp_perf(int cpunum, u64 *epp_perf) { return -ENOTSUPP; } +static inline int cppc_set_auto_sel(int cpu, bool enable) +{ + return -ENOTSUPP; +} +static inline int cppc_get_auto_sel_caps(int cpunum, struct cppc_perf_caps *perf_caps) +{ + return -ENOTSUPP; +} #endif /* !CONFIG_ACPI_CPPC_LIB */ #endif /* _CPPC_ACPI_H*/ From 3e6e078057640751654bda9fd2278a944f61fa4a Mon Sep 17 00:00:00 2001 From: Wyes Karny Date: Tue, 7 Mar 2023 11:27:37 +0000 Subject: [PATCH 09/29] Documentation: cpufreq: amd-pstate: Move amd_pstate param to alphabetical order Move amd_pstate command line param description to correct alphabetical order. Acked-by: Huang Rui Reviewed-by: Mario Limonciello Tested-by: Oleksandr Natalenko Signed-off-by: Wyes Karny Signed-off-by: Rafael J. Wysocki --- .../admin-guide/kernel-parameters.txt | 35 ++++++++++--------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 6221a1d057dd..1e38f3ae0e62 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -339,6 +339,24 @@ This mode requires kvm-amd.avic=1. (Default when IOMMU HW support is present.) + amd_pstate= [X86] + disable + Do not enable amd_pstate as the default + scaling driver for the supported processors + passive + Use amd_pstate as a scaling driver, driver requests a + desired performance on this abstract scale and the power + management firmware translates the requests into actual + hardware states (core frequency, data fabric and memory + clocks etc.) + active + Use amd_pstate_epp driver instance as the scaling driver, + driver provides a hint to the hardware if software wants + to bias toward performance (0x0) or energy efficiency (0xff) + to the CPPC firmware. then CPPC power algorithm will + calculate the runtime workload and adjust the realtime cores + frequency. + amijoy.map= [HW,JOY] Amiga joystick support Map of devices attached to JOY0DAT and JOY1DAT Format: , @@ -7059,20 +7077,3 @@ xmon commands. off xmon is disabled. - amd_pstate= [X86] - disable - Do not enable amd_pstate as the default - scaling driver for the supported processors - passive - Use amd_pstate as a scaling driver, driver requests a - desired performance on this abstract scale and the power - management firmware translates the requests into actual - hardware states (core frequency, data fabric and memory - clocks etc.) - active - Use amd_pstate_epp driver instance as the scaling driver, - driver provides a hint to the hardware if software wants - to bias toward performance (0x0) or energy efficiency (0xff) - to the CPPC firmware. then CPPC power algorithm will - calculate the runtime workload and adjust the realtime cores - frequency. From 2dd6d0ebf74049256160a3d03dabbd92fe0b8599 Mon Sep 17 00:00:00 2001 From: Wyes Karny Date: Tue, 7 Mar 2023 11:27:38 +0000 Subject: [PATCH 10/29] cpufreq: amd-pstate: Add guided autonomous mode From ACPI spec below 3 modes for CPPC can be defined: 1. Non autonomous: OS scaling governor specifies operating frequency/ performance level through `Desired Performance` register and platform follows that. 2. Guided autonomous: OS scaling governor specifies min and max frequencies/ performance levels through `Minimum Performance` and `Maximum Performance` register, and platform can autonomously select an operating frequency in this range. 3. Fully autonomous: OS only hints (via EPP) to platform for the required energy performance preference for the workload and platform autonomously scales the frequency. Currently (1) is supported by amd_pstate as passive mode, and (3) is implemented by EPP support. This change is to support (2). In guided autonomous mode the min_perf is based on the input from the scaling governor. For example, in case of schedutil this value depends on the current utilization. And max_perf is set to max capacity. To activate guided auto mode ``amd_pstate=guided`` command line parameter has to be passed in the kernel. Acked-by: Huang Rui Reviewed-by: Mario Limonciello Tested-by: Oleksandr Natalenko Signed-off-by: Wyes Karny Signed-off-by: Rafael J. Wysocki --- .../admin-guide/kernel-parameters.txt | 15 +++++--- drivers/cpufreq/amd-pstate.c | 34 +++++++++++++++---- include/linux/amd-pstate.h | 2 ++ 3 files changed, 40 insertions(+), 11 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 1e38f3ae0e62..1e5fe4891dd7 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -344,11 +344,11 @@ Do not enable amd_pstate as the default scaling driver for the supported processors passive - Use amd_pstate as a scaling driver, driver requests a - desired performance on this abstract scale and the power - management firmware translates the requests into actual - hardware states (core frequency, data fabric and memory - clocks etc.) + Use amd_pstate with passive mode as a scaling driver. + In this mode autonomous selection is disabled. + Driver requests a desired performance level and platform + tries to match the same performance level if it is + satisfied by guaranteed performance level. active Use amd_pstate_epp driver instance as the scaling driver, driver provides a hint to the hardware if software wants @@ -356,6 +356,11 @@ to the CPPC firmware. then CPPC power algorithm will calculate the runtime workload and adjust the realtime cores frequency. + guided + Activate guided autonomous mode. Driver requests minimum and + maximum performance level and the platform autonomously + selects a performance level in this range and appropriate + to the current workload. amijoy.map= [HW,JOY] Amiga joystick support Map of devices attached to JOY0DAT and JOY1DAT diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 73c7643b2697..61dcd7b89b26 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -308,7 +308,22 @@ static int cppc_init_perf(struct amd_cpudata *cpudata) cppc_perf.lowest_nonlinear_perf); WRITE_ONCE(cpudata->lowest_perf, cppc_perf.lowest_perf); - return 0; + if (cppc_state == AMD_PSTATE_ACTIVE) + return 0; + + ret = cppc_get_auto_sel_caps(cpudata->cpu, &cppc_perf); + if (ret) { + pr_warn("failed to get auto_sel, ret: %d\n", ret); + return 0; + } + + ret = cppc_set_auto_sel(cpudata->cpu, + (cppc_state == AMD_PSTATE_PASSIVE) ? 0 : 1); + + if (ret) + pr_warn("failed to set auto_sel, ret: %d\n", ret); + + return ret; } DEFINE_STATIC_CALL(amd_pstate_init_perf, pstate_init_perf); @@ -385,12 +400,18 @@ static inline bool amd_pstate_sample(struct amd_cpudata *cpudata) } static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf, - u32 des_perf, u32 max_perf, bool fast_switch) + u32 des_perf, u32 max_perf, bool fast_switch, int gov_flags) { u64 prev = READ_ONCE(cpudata->cppc_req_cached); u64 value = prev; des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf); + + if ((cppc_state == AMD_PSTATE_GUIDED) && (gov_flags & CPUFREQ_GOV_DYNAMIC_SWITCHING)) { + min_perf = des_perf; + des_perf = 0; + } + value &= ~AMD_CPPC_MIN_PERF(~0L); value |= AMD_CPPC_MIN_PERF(min_perf); @@ -445,7 +466,7 @@ static int amd_pstate_target(struct cpufreq_policy *policy, cpufreq_freq_transition_begin(policy, &freqs); amd_pstate_update(cpudata, min_perf, des_perf, - max_perf, false); + max_perf, false, policy->governor->flags); cpufreq_freq_transition_end(policy, &freqs, false); return 0; @@ -479,7 +500,8 @@ static void amd_pstate_adjust_perf(unsigned int cpu, if (max_perf < min_perf) max_perf = min_perf; - amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true); + amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true, + policy->governor->flags); cpufreq_cpu_put(policy); } @@ -1279,7 +1301,7 @@ static int __init amd_pstate_init(void) /* capability check */ if (boot_cpu_has(X86_FEATURE_CPPC)) { pr_debug("AMD CPPC MSR based functionality is supported\n"); - if (cppc_state == AMD_PSTATE_PASSIVE) + if (cppc_state != AMD_PSTATE_ACTIVE) current_pstate_driver->adjust_perf = amd_pstate_adjust_perf; } else { pr_debug("AMD CPPC shared memory based functionality is supported\n"); @@ -1341,7 +1363,7 @@ static int __init amd_pstate_param(char *str) if (cppc_state == AMD_PSTATE_ACTIVE) current_pstate_driver = &amd_pstate_epp_driver; - if (cppc_state == AMD_PSTATE_PASSIVE) + if (cppc_state == AMD_PSTATE_PASSIVE || cppc_state == AMD_PSTATE_GUIDED) current_pstate_driver = &amd_pstate_driver; return 0; diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h index f5f22418e64b..c10ebf8c42e6 100644 --- a/include/linux/amd-pstate.h +++ b/include/linux/amd-pstate.h @@ -97,6 +97,7 @@ enum amd_pstate_mode { AMD_PSTATE_DISABLE = 0, AMD_PSTATE_PASSIVE, AMD_PSTATE_ACTIVE, + AMD_PSTATE_GUIDED, AMD_PSTATE_MAX, }; @@ -104,6 +105,7 @@ static const char * const amd_pstate_mode_string[] = { [AMD_PSTATE_DISABLE] = "disable", [AMD_PSTATE_PASSIVE] = "passive", [AMD_PSTATE_ACTIVE] = "active", + [AMD_PSTATE_GUIDED] = "guided", NULL, }; #endif /* _LINUX_AMD_PSTATE_H */ From 3ca7bc818d8ccf399cb0366d8d9a915c04a446f9 Mon Sep 17 00:00:00 2001 From: Wyes Karny Date: Tue, 7 Mar 2023 11:27:39 +0000 Subject: [PATCH 11/29] cpufreq: amd-pstate: Add guided mode control support via sysfs amd_pstate driver's `status` sysfs entry helps to control the driver's mode dynamically by user. After the addition of guided mode the combinations of mode transitions have been increased (16 combinations). Therefore optimise the amd_pstate_update_status function by implementing a state transition table. There are 4 states amd_pstate supports, namely: 'disable', 'passive', 'active', and 'guided'. The transition from any state to any other state is possible after this change. Sysfs interface: To disable amd_pstate driver: # echo disable > /sys/devices/system/cpu/amd_pstate/status To enable passive mode: # echo passive > /sys/devices/system/cpu/amd_pstate/status To change mode to active: # echo active > /sys/devices/system/cpu/amd_pstate/status To change mode to guided: # echo guided > /sys/devices/system/cpu/amd_pstate/status Acked-by: Huang Rui Reviewed-by: Mario Limonciello Tested-by: Oleksandr Natalenko Signed-off-by: Wyes Karny Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/amd-pstate.c | 143 +++++++++++++++++++++++++---------- 1 file changed, 101 insertions(+), 42 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 61dcd7b89b26..7955cfc91c31 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -106,6 +106,8 @@ static unsigned int epp_values[] = { [EPP_INDEX_POWERSAVE] = AMD_CPPC_EPP_POWERSAVE, }; +typedef int (*cppc_mode_transition_fn)(int); + static inline int get_mode_idx_from_str(const char *str, size_t size) { int i; @@ -838,6 +840,98 @@ static ssize_t show_energy_performance_preference( return sysfs_emit(buf, "%s\n", energy_perf_strings[preference]); } +static void amd_pstate_driver_cleanup(void) +{ + amd_pstate_enable(false); + cppc_state = AMD_PSTATE_DISABLE; + current_pstate_driver = NULL; +} + +static int amd_pstate_register_driver(int mode) +{ + int ret; + + if (mode == AMD_PSTATE_PASSIVE || mode == AMD_PSTATE_GUIDED) + current_pstate_driver = &amd_pstate_driver; + else if (mode == AMD_PSTATE_ACTIVE) + current_pstate_driver = &amd_pstate_epp_driver; + else + return -EINVAL; + + cppc_state = mode; + ret = cpufreq_register_driver(current_pstate_driver); + if (ret) { + amd_pstate_driver_cleanup(); + return ret; + } + return 0; +} + +static int amd_pstate_unregister_driver(int dummy) +{ + cpufreq_unregister_driver(current_pstate_driver); + amd_pstate_driver_cleanup(); + return 0; +} + +static int amd_pstate_change_mode_without_dvr_change(int mode) +{ + int cpu = 0; + + cppc_state = mode; + + if (boot_cpu_has(X86_FEATURE_CPPC) || cppc_state == AMD_PSTATE_ACTIVE) + return 0; + + for_each_present_cpu(cpu) { + cppc_set_auto_sel(cpu, (cppc_state == AMD_PSTATE_PASSIVE) ? 0 : 1); + } + + return 0; +} + +static int amd_pstate_change_driver_mode(int mode) +{ + int ret; + + ret = amd_pstate_unregister_driver(0); + if (ret) + return ret; + + ret = amd_pstate_register_driver(mode); + if (ret) + return ret; + + return 0; +} + +cppc_mode_transition_fn mode_state_machine[AMD_PSTATE_MAX][AMD_PSTATE_MAX] = { + [AMD_PSTATE_DISABLE] = { + [AMD_PSTATE_DISABLE] = NULL, + [AMD_PSTATE_PASSIVE] = amd_pstate_register_driver, + [AMD_PSTATE_ACTIVE] = amd_pstate_register_driver, + [AMD_PSTATE_GUIDED] = amd_pstate_register_driver, + }, + [AMD_PSTATE_PASSIVE] = { + [AMD_PSTATE_DISABLE] = amd_pstate_unregister_driver, + [AMD_PSTATE_PASSIVE] = NULL, + [AMD_PSTATE_ACTIVE] = amd_pstate_change_driver_mode, + [AMD_PSTATE_GUIDED] = amd_pstate_change_mode_without_dvr_change, + }, + [AMD_PSTATE_ACTIVE] = { + [AMD_PSTATE_DISABLE] = amd_pstate_unregister_driver, + [AMD_PSTATE_PASSIVE] = amd_pstate_change_driver_mode, + [AMD_PSTATE_ACTIVE] = NULL, + [AMD_PSTATE_GUIDED] = amd_pstate_change_driver_mode, + }, + [AMD_PSTATE_GUIDED] = { + [AMD_PSTATE_DISABLE] = amd_pstate_unregister_driver, + [AMD_PSTATE_PASSIVE] = amd_pstate_change_mode_without_dvr_change, + [AMD_PSTATE_ACTIVE] = amd_pstate_change_driver_mode, + [AMD_PSTATE_GUIDED] = NULL, + }, +}; + static ssize_t amd_pstate_show_status(char *buf) { if (!current_pstate_driver) @@ -846,57 +940,22 @@ static ssize_t amd_pstate_show_status(char *buf) return sysfs_emit(buf, "%s\n", amd_pstate_mode_string[cppc_state]); } -static void amd_pstate_driver_cleanup(void) -{ - current_pstate_driver = NULL; -} - static int amd_pstate_update_status(const char *buf, size_t size) { - int ret = 0; int mode_idx; - if (size > 7 || size < 6) + if (size > strlen("passive") || size < strlen("active")) return -EINVAL; + mode_idx = get_mode_idx_from_str(buf, size); - switch(mode_idx) { - case AMD_PSTATE_DISABLE: - if (!current_pstate_driver) - return -EINVAL; - if (cppc_state == AMD_PSTATE_ACTIVE) - return -EBUSY; - cpufreq_unregister_driver(current_pstate_driver); - amd_pstate_driver_cleanup(); - break; - case AMD_PSTATE_PASSIVE: - if (current_pstate_driver) { - if (current_pstate_driver == &amd_pstate_driver) - return 0; - cpufreq_unregister_driver(current_pstate_driver); - cppc_state = AMD_PSTATE_PASSIVE; - current_pstate_driver = &amd_pstate_driver; - } + if (mode_idx < 0 || mode_idx >= AMD_PSTATE_MAX) + return -EINVAL; - ret = cpufreq_register_driver(current_pstate_driver); - break; - case AMD_PSTATE_ACTIVE: - if (current_pstate_driver) { - if (current_pstate_driver == &amd_pstate_epp_driver) - return 0; - cpufreq_unregister_driver(current_pstate_driver); - current_pstate_driver = &amd_pstate_epp_driver; - cppc_state = AMD_PSTATE_ACTIVE; - } + if (mode_state_machine[cppc_state][mode_idx]) + return mode_state_machine[cppc_state][mode_idx](mode_idx); - ret = cpufreq_register_driver(current_pstate_driver); - break; - default: - ret = -EINVAL; - break; - } - - return ret; + return 0; } static ssize_t show_status(struct kobject *kobj, From 7a9dec665f6875f61fe0c2e71b25842daade5110 Mon Sep 17 00:00:00 2001 From: Wyes Karny Date: Tue, 7 Mar 2023 11:27:40 +0000 Subject: [PATCH 12/29] Documentation: cpufreq: amd-pstate: Update amd_pstate status sysfs for guided Update amd_pstate status sysfs for guided mode. Acked-by: Huang Rui Reviewed-by: Bagas Sanjaya Reviewed-by: Mario Limonciello Tested-by: Oleksandr Natalenko Signed-off-by: Wyes Karny Signed-off-by: Rafael J. Wysocki --- Documentation/admin-guide/pm/amd-pstate.rst | 31 ++++++++++++++++----- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/Documentation/admin-guide/pm/amd-pstate.rst b/Documentation/admin-guide/pm/amd-pstate.rst index 6e5298b521b1..1cf40f69278c 100644 --- a/Documentation/admin-guide/pm/amd-pstate.rst +++ b/Documentation/admin-guide/pm/amd-pstate.rst @@ -303,13 +303,18 @@ efficiency frequency management method on AMD processors. AMD Pstate Driver Operation Modes ================================= -``amd_pstate`` CPPC has two operation modes: CPPC Autonomous(active) mode and -CPPC non-autonomous(passive) mode. -active mode and passive mode can be chosen by different kernel parameters. -When in Autonomous mode, CPPC ignores requests done in the Desired Performance -Target register and takes into account only the values set to the Minimum requested -performance, Maximum requested performance, and Energy Performance Preference -registers. When Autonomous is disabled, it only considers the Desired Performance Target. +``amd_pstate`` CPPC has 3 operation modes: autonomous (active) mode, +non-autonomous (passive) mode and guided autonomous (guided) mode. +Active/passive/guided mode can be chosen by different kernel parameters. + +- In autonomous mode, platform ignores the desired performance level request + and takes into account only the values set to the minimum, maximum and energy + performance preference registers. +- In non-autonomous mode, platform gets desired performance level + from OS directly through Desired Performance Register. +- In guided-autonomous mode, platform sets operating performance level + autonomously according to the current workload and within the limits set by + OS through min and max performance registers. Active Mode ------------ @@ -338,6 +343,15 @@ to the Performance Reduction Tolerance register. Above the nominal performance l processor must provide at least nominal performance requested and go higher if current operating conditions allow. +Guided Mode +----------- + +``amd_pstate=guided`` + +If ``amd_pstate=guided`` is passed to kernel command line option then this mode +is activated. In this mode, driver requests minimum and maximum performance +level and the platform autonomously selects a performance level in this range +and appropriate to the current workload. User Space Interface in ``sysfs`` - General =========================================== @@ -358,6 +372,9 @@ control its functionality at the system level. They are located in the "passive" The driver is functional and in the ``passive mode`` + "guided" + The driver is functional and in the ``guided mode`` + "disable" The driver is unregistered and not functional now. From 877d5cd2aeed8fddeb3afe174c6b9eb77cd56f8b Mon Sep 17 00:00:00 2001 From: qinyu Date: Mon, 20 Mar 2023 16:17:02 +0800 Subject: [PATCH 13/29] cpufreq: warn about invalid vals to scaling_max/min_freq interfaces When echo an invalid val to scaling_min_freq: > echo 123abc123 > scaling_min_freq It looks weird to have a return val of 0: > echo $? > 0 Sane people won't echo strings like that into these interfaces but fuzz tests may do. Also, maybe it's better to inform people if input is invalid. After this: > echo 123abc123 > scaling_min_freq > -bash: echo: write error: Invalid argument Signed-off-by: qinyu Tested-by: zhangxiaofeng Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 6d8fd3b8dcb5..d61f7308f63c 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -725,9 +725,9 @@ static ssize_t store_##file_name \ unsigned long val; \ int ret; \ \ - ret = sscanf(buf, "%lu", &val); \ - if (ret != 1) \ - return -EINVAL; \ + ret = kstrtoul(buf, 0, &val); \ + if (ret) \ + return ret; \ \ ret = freq_qos_update_request(policy->object##_freq_req, val);\ return ret >= 0 ? count : ret; \ From 8ffdb1fe9121f85232171630a8cfb0574cbf81b2 Mon Sep 17 00:00:00 2001 From: Jingyu Wang Date: Wed, 8 Mar 2023 20:28:30 +0800 Subject: [PATCH 14/29] cpufreq: Fix typo in the ARM_BRCMSTB_AVS_CPUFREQ Kconfig entry Delete the redundant word 'to' from the help text in the ARM_BRCMSTB_AVS_CPUFREQ Kconfig entry. Signed-off-by: Jingyu Wang [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/Kconfig.arm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index 97acaa2136fd..123b4bbfcfee 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -95,7 +95,7 @@ config ARM_BRCMSTB_AVS_CPUFREQ help Some Broadcom STB SoCs use a co-processor running proprietary firmware ("AVS") to handle voltage and frequency scaling. This driver provides - a standard CPUfreq interface to to the firmware. + a standard CPUfreq interface to the firmware. Say Y, if you have a Broadcom SoC with AVS support for DFS or DVFS. From 175c9df15aef7a1446fb67154a186b73dd892b50 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 10 Mar 2023 08:47:03 -0600 Subject: [PATCH 15/29] cpufreq: pmac32: Use of_property_read_bool() for boolean properties It is preferred to use typed property access functions (i.e. of_property_read_ functions) rather than low-level of_get_property/of_find_property functions for reading properties. Convert reading boolean properties to to of_property_read_bool(). Signed-off-by: Rob Herring Acked-by: Viresh Kumar Reviewed-by: Michael Ellerman Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/pmac32-cpufreq.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/pmac32-cpufreq.c b/drivers/cpufreq/pmac32-cpufreq.c index 4b8ee2014da6..7ec6d1bb4592 100644 --- a/drivers/cpufreq/pmac32-cpufreq.c +++ b/drivers/cpufreq/pmac32-cpufreq.c @@ -546,7 +546,7 @@ static int pmac_cpufreq_init_7447A(struct device_node *cpunode) { struct device_node *volt_gpio_np; - if (of_get_property(cpunode, "dynamic-power-step", NULL) == NULL) + if (!of_property_read_bool(cpunode, "dynamic-power-step")) return 1; volt_gpio_np = of_find_node_by_name(NULL, "cpu-vcore-select"); @@ -576,7 +576,7 @@ static int pmac_cpufreq_init_750FX(struct device_node *cpunode) u32 pvr; const u32 *value; - if (of_get_property(cpunode, "dynamic-power-step", NULL) == NULL) + if (!of_property_read_bool(cpunode, "dynamic-power-step")) return 1; hi_freq = cur_freq; @@ -632,7 +632,7 @@ static int __init pmac_cpufreq_setup(void) /* Check for 7447A based MacRISC3 */ if (of_machine_is_compatible("MacRISC3") && - of_get_property(cpunode, "dynamic-power-step", NULL) && + of_property_read_bool(cpunode, "dynamic-power-step") && PVR_VER(mfspr(SPRN_PVR)) == 0x8003) { pmac_cpufreq_init_7447A(cpunode); From d51c63230994f167126d9d8381011b4cb2b0ad22 Mon Sep 17 00:00:00 2001 From: Jia-Wei Chang Date: Fri, 24 Mar 2023 18:11:27 +0800 Subject: [PATCH 16/29] cpufreq: mediatek: fix passing zero to 'PTR_ERR' In order to prevent passing zero to 'PTR_ERR' in mtk_cpu_dvfs_info_init(), we fix the return value of of_get_cci() using error pointer by explicitly casting error number. Signed-off-by: Jia-Wei Chang Fixes: 0daa47325bae ("cpufreq: mediatek: Link CCI device to CPU") Reported-by: Dan Carpenter Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Viresh Kumar --- drivers/cpufreq/mediatek-cpufreq.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/cpufreq/mediatek-cpufreq.c b/drivers/cpufreq/mediatek-cpufreq.c index 7f2680bc9a0f..01d949707c37 100644 --- a/drivers/cpufreq/mediatek-cpufreq.c +++ b/drivers/cpufreq/mediatek-cpufreq.c @@ -373,13 +373,13 @@ static struct device *of_get_cci(struct device *cpu_dev) struct platform_device *pdev; np = of_parse_phandle(cpu_dev->of_node, "mediatek,cci", 0); - if (IS_ERR_OR_NULL(np)) - return NULL; + if (!np) + return ERR_PTR(-ENODEV); pdev = of_find_device_by_node(np); of_node_put(np); - if (IS_ERR_OR_NULL(pdev)) - return NULL; + if (!pdev) + return ERR_PTR(-ENODEV); return &pdev->dev; } @@ -401,7 +401,7 @@ static int mtk_cpu_dvfs_info_init(struct mtk_cpu_dvfs_info *info, int cpu) info->ccifreq_bound = false; if (info->soc_data->ccifreq_supported) { info->cci_dev = of_get_cci(info->cpu_dev); - if (IS_ERR_OR_NULL(info->cci_dev)) { + if (IS_ERR(info->cci_dev)) { ret = PTR_ERR(info->cci_dev); dev_err(cpu_dev, "cpu%d: failed to get cci device\n", cpu); return -ENODEV; From d51e106240bc755cbe59634b70d567c192b045b2 Mon Sep 17 00:00:00 2001 From: Jia-Wei Chang Date: Fri, 24 Mar 2023 18:11:28 +0800 Subject: [PATCH 17/29] cpufreq: mediatek: fix KP caused by handler usage after regulator_put/clk_put Any kind of failure in mtk_cpu_dvfs_info_init() will lead to calling regulator_put() or clk_put() and the KP will occur since the regulator/clk handlers are used after released in mtk_cpu_dvfs_info_release(). To prevent the usage after regulator_put()/clk_put(), the regulator/clk handlers are addressed in a way of "Free the Last Thing Style". Signed-off-by: Jia-Wei Chang Fixes: 4b9ceb757bbb ("cpufreq: mediatek: Enable clocks and regulators") Suggested-by: AngeloGioacchino Del Regno Suggested-by: Dan Carpenter Signed-off-by: Viresh Kumar --- drivers/cpufreq/mediatek-cpufreq.c | 62 +++++++++++++++--------------- 1 file changed, 30 insertions(+), 32 deletions(-) diff --git a/drivers/cpufreq/mediatek-cpufreq.c b/drivers/cpufreq/mediatek-cpufreq.c index 01d949707c37..6dc225546a8d 100644 --- a/drivers/cpufreq/mediatek-cpufreq.c +++ b/drivers/cpufreq/mediatek-cpufreq.c @@ -420,7 +420,7 @@ static int mtk_cpu_dvfs_info_init(struct mtk_cpu_dvfs_info *info, int cpu) ret = PTR_ERR(info->inter_clk); dev_err_probe(cpu_dev, ret, "cpu%d: failed to get intermediate clk\n", cpu); - goto out_free_resources; + goto out_free_mux_clock; } info->proc_reg = regulator_get_optional(cpu_dev, "proc"); @@ -428,13 +428,13 @@ static int mtk_cpu_dvfs_info_init(struct mtk_cpu_dvfs_info *info, int cpu) ret = PTR_ERR(info->proc_reg); dev_err_probe(cpu_dev, ret, "cpu%d: failed to get proc regulator\n", cpu); - goto out_free_resources; + goto out_free_inter_clock; } ret = regulator_enable(info->proc_reg); if (ret) { dev_warn(cpu_dev, "cpu%d: failed to enable vproc\n", cpu); - goto out_free_resources; + goto out_free_proc_reg; } /* Both presence and absence of sram regulator are valid cases. */ @@ -442,14 +442,14 @@ static int mtk_cpu_dvfs_info_init(struct mtk_cpu_dvfs_info *info, int cpu) if (IS_ERR(info->sram_reg)) { ret = PTR_ERR(info->sram_reg); if (ret == -EPROBE_DEFER) - goto out_free_resources; + goto out_disable_proc_reg; info->sram_reg = NULL; } else { ret = regulator_enable(info->sram_reg); if (ret) { dev_warn(cpu_dev, "cpu%d: failed to enable vsram\n", cpu); - goto out_free_resources; + goto out_free_sram_reg; } } @@ -458,13 +458,13 @@ static int mtk_cpu_dvfs_info_init(struct mtk_cpu_dvfs_info *info, int cpu) if (ret) { dev_err(cpu_dev, "cpu%d: failed to get OPP-sharing information\n", cpu); - goto out_free_resources; + goto out_disable_sram_reg; } ret = dev_pm_opp_of_cpumask_add_table(&info->cpus); if (ret) { dev_warn(cpu_dev, "cpu%d: no OPP table\n", cpu); - goto out_free_resources; + goto out_disable_sram_reg; } ret = clk_prepare_enable(info->cpu_clk); @@ -533,43 +533,41 @@ static int mtk_cpu_dvfs_info_init(struct mtk_cpu_dvfs_info *info, int cpu) out_free_opp_table: dev_pm_opp_of_cpumask_remove_table(&info->cpus); -out_free_resources: - if (regulator_is_enabled(info->proc_reg)) - regulator_disable(info->proc_reg); - if (info->sram_reg && regulator_is_enabled(info->sram_reg)) +out_disable_sram_reg: + if (info->sram_reg) regulator_disable(info->sram_reg); - if (!IS_ERR(info->proc_reg)) - regulator_put(info->proc_reg); - if (!IS_ERR(info->sram_reg)) +out_free_sram_reg: + if (info->sram_reg) regulator_put(info->sram_reg); - if (!IS_ERR(info->cpu_clk)) - clk_put(info->cpu_clk); - if (!IS_ERR(info->inter_clk)) - clk_put(info->inter_clk); + +out_disable_proc_reg: + regulator_disable(info->proc_reg); + +out_free_proc_reg: + regulator_put(info->proc_reg); + +out_free_inter_clock: + clk_put(info->inter_clk); + +out_free_mux_clock: + clk_put(info->cpu_clk); return ret; } static void mtk_cpu_dvfs_info_release(struct mtk_cpu_dvfs_info *info) { - if (!IS_ERR(info->proc_reg)) { - regulator_disable(info->proc_reg); - regulator_put(info->proc_reg); - } - if (!IS_ERR(info->sram_reg)) { + regulator_disable(info->proc_reg); + regulator_put(info->proc_reg); + if (info->sram_reg) { regulator_disable(info->sram_reg); regulator_put(info->sram_reg); } - if (!IS_ERR(info->cpu_clk)) { - clk_disable_unprepare(info->cpu_clk); - clk_put(info->cpu_clk); - } - if (!IS_ERR(info->inter_clk)) { - clk_disable_unprepare(info->inter_clk); - clk_put(info->inter_clk); - } - + clk_disable_unprepare(info->cpu_clk); + clk_put(info->cpu_clk); + clk_disable_unprepare(info->inter_clk); + clk_put(info->inter_clk); dev_pm_opp_of_cpumask_remove_table(&info->cpus); dev_pm_opp_unregister_notifier(info->cpu_dev, &info->opp_nb); } From d3296bb4cafd4bad4a5cf2eeab9d19cc94f9e30e Mon Sep 17 00:00:00 2001 From: Jia-Wei Chang Date: Fri, 24 Mar 2023 18:11:29 +0800 Subject: [PATCH 18/29] cpufreq: mediatek: raise proc/sram max voltage for MT8516 Since the upper boundary of proc/sram voltage of MT8516 is 1300 mV, which is greater than the value of MT2701 1150 mV, we fix it by adding the corresponding platform data and specify proc/sram_max_volt to support MT8516. Signed-off-by: Jia-Wei Chang Fixes: ead858bd128d ("cpufreq: mediatek: Move voltage limits to platform data") Fixes: 6a17b3876bc8 ("cpufreq: mediatek: Refine mtk_cpufreq_voltage_tracking()") Reported-by: Nick Hainke Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Viresh Kumar --- drivers/cpufreq/mediatek-cpufreq.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/mediatek-cpufreq.c b/drivers/cpufreq/mediatek-cpufreq.c index 6dc225546a8d..764e4fbdd536 100644 --- a/drivers/cpufreq/mediatek-cpufreq.c +++ b/drivers/cpufreq/mediatek-cpufreq.c @@ -711,20 +711,29 @@ static const struct mtk_cpufreq_platform_data mt8186_platform_data = { .ccifreq_supported = true, }; +static const struct mtk_cpufreq_platform_data mt8516_platform_data = { + .min_volt_shift = 100000, + .max_volt_shift = 200000, + .proc_max_volt = 1310000, + .sram_min_volt = 0, + .sram_max_volt = 1310000, + .ccifreq_supported = false, +}; + /* List of machines supported by this driver */ static const struct of_device_id mtk_cpufreq_machines[] __initconst = { { .compatible = "mediatek,mt2701", .data = &mt2701_platform_data }, { .compatible = "mediatek,mt2712", .data = &mt2701_platform_data }, { .compatible = "mediatek,mt7622", .data = &mt2701_platform_data }, { .compatible = "mediatek,mt7623", .data = &mt2701_platform_data }, - { .compatible = "mediatek,mt8167", .data = &mt2701_platform_data }, + { .compatible = "mediatek,mt8167", .data = &mt8516_platform_data }, { .compatible = "mediatek,mt817x", .data = &mt2701_platform_data }, { .compatible = "mediatek,mt8173", .data = &mt2701_platform_data }, { .compatible = "mediatek,mt8176", .data = &mt2701_platform_data }, { .compatible = "mediatek,mt8183", .data = &mt8183_platform_data }, { .compatible = "mediatek,mt8186", .data = &mt8186_platform_data }, { .compatible = "mediatek,mt8365", .data = &mt2701_platform_data }, - { .compatible = "mediatek,mt8516", .data = &mt2701_platform_data }, + { .compatible = "mediatek,mt8516", .data = &mt8516_platform_data }, { } }; MODULE_DEVICE_TABLE(of, mtk_cpufreq_machines); From 0883426fd07e39355362e3f2eb9aee1a154dcaf6 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Fri, 24 Mar 2023 18:11:30 +0800 Subject: [PATCH 19/29] cpufreq: mediatek: Raise proc and sram max voltage for MT7622/7623 During the addition of SRAM voltage tracking for CCI scaling, this driver got some voltage limits set for the vtrack algorithm: these were moved to platform data first, then enforced in a later commit 6a17b3876bc8 ("cpufreq: mediatek: Refine mtk_cpufreq_voltage_tracking()") using these as max values for the regulator_set_voltage() calls. In this case, the vsram/vproc constraints for MT7622 and MT7623 were supposed to be the same as MT2701 (and a number of other SoCs), but that turned out to be a mistake because the aforementioned two SoCs' maximum voltage for both VPROC and VPROC_SRAM is 1.36V. Fix that by adding new platform data for MT7622/7623 declaring the right {proc,sram}_max_volt parameter. Fixes: ead858bd128d ("cpufreq: mediatek: Move voltage limits to platform data") Fixes: 6a17b3876bc8 ("cpufreq: mediatek: Refine mtk_cpufreq_voltage_tracking()") Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Jia-Wei Chang Signed-off-by: Viresh Kumar --- drivers/cpufreq/mediatek-cpufreq.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/mediatek-cpufreq.c b/drivers/cpufreq/mediatek-cpufreq.c index 764e4fbdd536..9a39a7ccfae9 100644 --- a/drivers/cpufreq/mediatek-cpufreq.c +++ b/drivers/cpufreq/mediatek-cpufreq.c @@ -693,6 +693,15 @@ static const struct mtk_cpufreq_platform_data mt2701_platform_data = { .ccifreq_supported = false, }; +static const struct mtk_cpufreq_platform_data mt7622_platform_data = { + .min_volt_shift = 100000, + .max_volt_shift = 200000, + .proc_max_volt = 1360000, + .sram_min_volt = 0, + .sram_max_volt = 1360000, + .ccifreq_supported = false, +}; + static const struct mtk_cpufreq_platform_data mt8183_platform_data = { .min_volt_shift = 100000, .max_volt_shift = 200000, @@ -724,8 +733,8 @@ static const struct mtk_cpufreq_platform_data mt8516_platform_data = { static const struct of_device_id mtk_cpufreq_machines[] __initconst = { { .compatible = "mediatek,mt2701", .data = &mt2701_platform_data }, { .compatible = "mediatek,mt2712", .data = &mt2701_platform_data }, - { .compatible = "mediatek,mt7622", .data = &mt2701_platform_data }, - { .compatible = "mediatek,mt7623", .data = &mt2701_platform_data }, + { .compatible = "mediatek,mt7622", .data = &mt7622_platform_data }, + { .compatible = "mediatek,mt7623", .data = &mt7622_platform_data }, { .compatible = "mediatek,mt8167", .data = &mt8516_platform_data }, { .compatible = "mediatek,mt817x", .data = &mt2701_platform_data }, { .compatible = "mediatek,mt8173", .data = &mt2701_platform_data }, From ba5e770c9698782bc203bbf5cf3b36a77720bdbe Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 23 Mar 2023 18:40:26 +0100 Subject: [PATCH 20/29] cpufreq: qcom-cpufreq-hw: fix double IO unmap and resource release on exit Commit 054a3ef683a1 ("cpufreq: qcom-hw: Allocate qcom_cpufreq_data during probe") moved getting memory resource and iomap from qcom_cpufreq_hw_cpu_init() to the probe function, however it left untouched cleanup in qcom_cpufreq_hw_cpu_exit(). During device unbind this will lead to doule release of resource and double iounmap(), first by qcom_cpufreq_hw_cpu_exit() and second via managed resources: resource: Trying to free nonexistent resource <0x0000000018593000-0x0000000018593fff> Trying to vunmap() nonexistent vm area (0000000088a7d4dc) ... vunmap (mm/vmalloc.c:2771 (discriminator 1)) iounmap (mm/ioremap.c:60) devm_ioremap_release (lib/devres.c:19) devres_release_all (drivers/base/devres.c:506 drivers/base/devres.c:535) device_unbind_cleanup (drivers/base/dd.c:523) device_release_driver_internal (drivers/base/dd.c:1248 drivers/base/dd.c:1263) device_driver_detach (drivers/base/dd.c:1300) unbind_store (drivers/base/bus.c:243) drv_attr_store (drivers/base/bus.c:127) sysfs_kf_write (fs/sysfs/file.c:137) kernfs_fop_write_iter (fs/kernfs/file.c:334) vfs_write (include/linux/fs.h:1851 fs/read_write.c:491 fs/read_write.c:584) ksys_write (fs/read_write.c:637) __arm64_sys_write (fs/read_write.c:646) invoke_syscall (arch/arm64/include/asm/current.h:19 arch/arm64/kernel/syscall.c:57) el0_svc_common.constprop.0 (arch/arm64/include/asm/daifflags.h:28 arch/arm64/kernel/syscall.c:150) do_el0_svc (arch/arm64/kernel/syscall.c:194) el0_svc (arch/arm64/include/asm/daifflags.h:28 arch/arm64/kernel/entry-common.c:133 arch/arm64/kernel/entry-common.c:142 arch/arm64/kernel/entry-common.c:638) el0t_64_sync_handler (arch/arm64/kernel/entry-common.c:656) el0t_64_sync (arch/arm64/kernel/entry.S:591) Fixes: 054a3ef683a1 ("cpufreq: qcom-hw: Allocate qcom_cpufreq_data during probe") Cc: Cc: Manivannan Sadhasivam Signed-off-by: Krzysztof Kozlowski Reviewed-by: Manivannan Sadhasivam Reviewed-by: Bjorn Andersson Signed-off-by: Viresh Kumar --- drivers/cpufreq/qcom-cpufreq-hw.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index 575a4461c25a..b75e6157dc81 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -45,7 +45,6 @@ struct qcom_cpufreq_soc_data { struct qcom_cpufreq_data { void __iomem *base; - struct resource *res; /* * Mutex to synchronize between de-init sequence and re-starting LMh @@ -592,16 +591,12 @@ static int qcom_cpufreq_hw_cpu_exit(struct cpufreq_policy *policy) { struct device *cpu_dev = get_cpu_device(policy->cpu); struct qcom_cpufreq_data *data = policy->driver_data; - struct resource *res = data->res; - void __iomem *base = data->base; dev_pm_opp_remove_all_dynamic(cpu_dev); dev_pm_opp_of_cpumask_remove_table(policy->related_cpus); qcom_cpufreq_hw_lmh_exit(data); kfree(policy->freq_table); kfree(data); - iounmap(base); - release_mem_region(res->start, resource_size(res)); return 0; } @@ -704,17 +699,15 @@ static int qcom_cpufreq_hw_driver_probe(struct platform_device *pdev) for (i = 0; i < num_domains; i++) { struct qcom_cpufreq_data *data = &qcom_cpufreq.data[i]; struct clk_init_data clk_init = {}; - struct resource *res; void __iomem *base; - base = devm_platform_get_and_ioremap_resource(pdev, i, &res); + base = devm_platform_ioremap_resource(pdev, i); if (IS_ERR(base)) { - dev_err(dev, "Failed to map resource %pR\n", res); + dev_err(dev, "Failed to map resource index %d\n", i); return PTR_ERR(base); } data->base = base; - data->res = res; /* Register CPU clock for each frequency domain */ clk_init.name = kasprintf(GFP_KERNEL, "qcom_cpufreq%d", i); From 417598f998520b1484a7cef2ac0563de7d975937 Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Fri, 24 Mar 2023 15:06:24 +0100 Subject: [PATCH 21/29] cpufreq: Add SM7225 to cpufreq-dt-platdev blocklist The Qualcomm SM7225 platform uses the qcom-cpufreq-hw driver, so add it to the cpufreq-dt-platdev driver's blocklist. Signed-off-by: Luca Weiss Signed-off-by: Viresh Kumar --- drivers/cpufreq/cpufreq-dt-platdev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index 5ac6b9e5270e..452181434735 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -152,6 +152,7 @@ static const struct of_device_id blocklist[] __initconst = { { .compatible = "qcom,sm6115", }, { .compatible = "qcom,sm6350", }, { .compatible = "qcom,sm6375", }, + { .compatible = "qcom,sm7225", }, { .compatible = "qcom,sm8150", }, { .compatible = "qcom,sm8250", }, { .compatible = "qcom,sm8350", }, From 287143d886961194c23c98ebbde5c0fad39e12b8 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Wed, 8 Mar 2023 02:26:58 +0100 Subject: [PATCH 22/29] dt-bindings: cpufreq: cpufreq-qcom-hw: Allow just 1 frequency domain Some SoCs implementing CPUFREQ-HW only have a single frequency domain. Allow such case. Signed-off-by: Konrad Dybcio Acked-by: Rob Herring Signed-off-by: Viresh Kumar --- .../devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml b/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml index 92693c33edf9..276e2871fc99 100644 --- a/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml +++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml @@ -37,14 +37,14 @@ properties: - const: qcom,cpufreq-epss reg: - minItems: 2 + minItems: 1 items: - description: Frequency domain 0 register region - description: Frequency domain 1 register region - description: Frequency domain 2 register region reg-names: - minItems: 2 + minItems: 1 items: - const: freq-domain0 - const: freq-domain1 From 7ae24e054f753ff40ca1adaa631edd7476590a5f Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Wed, 8 Mar 2023 02:26:59 +0100 Subject: [PATCH 23/29] dt-bindings: cpufreq: cpufreq-qcom-hw: Sanitize data per compatible Introduce per-SoC compatibles for OSM targets (read: pre-sm8250) and sanitize the number of interrupt{s,-names} and reg/-names per-compatible. Signed-off-by: Konrad Dybcio Reviewed-by: Rob Herring Signed-off-by: Viresh Kumar --- .../bindings/cpufreq/cpufreq-qcom-hw.yaml | 90 ++++++++++++++++++- 1 file changed, 89 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml b/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml index 276e2871fc99..b152fbe8a668 100644 --- a/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml +++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml @@ -20,6 +20,12 @@ properties: oneOf: - description: v1 of CPUFREQ HW items: + - enum: + - qcom,sc7180-cpufreq-hw + - qcom,sdm845-cpufreq-hw + - qcom,sm6115-cpufreq-hw + - qcom,sm6350-cpufreq-hw + - qcom,sm8150-cpufreq-hw - const: qcom,cpufreq-hw - description: v2 of CPUFREQ HW (EPSS) @@ -86,6 +92,88 @@ required: additionalProperties: false +allOf: + - if: + properties: + compatible: + contains: + enum: + - qcom,qdu1000-cpufreq-epss + - qcom,sc7180-cpufreq-hw + - qcom,sc8280xp-cpufreq-epss + - qcom,sdm845-cpufreq-hw + - qcom,sm6115-cpufreq-hw + - qcom,sm6350-cpufreq-hw + - qcom,sm6375-cpufreq-epss + then: + properties: + reg: + minItems: 2 + maxItems: 2 + + reg-names: + minItems: 2 + maxItems: 2 + + interrupts: + minItems: 2 + maxItems: 2 + + interrupt-names: + minItems: 2 + + - if: + properties: + compatible: + contains: + enum: + - qcom,sc7280-cpufreq-epss + - qcom,sm8250-cpufreq-epss + - qcom,sm8350-cpufreq-epss + - qcom,sm8450-cpufreq-epss + - qcom,sm8550-cpufreq-epss + then: + properties: + reg: + minItems: 3 + maxItems: 3 + + reg-names: + minItems: 3 + maxItems: 3 + + interrupts: + minItems: 3 + maxItems: 3 + + interrupt-names: + minItems: 3 + + - if: + properties: + compatible: + contains: + enum: + - qcom,sm8150-cpufreq-hw + then: + properties: + reg: + minItems: 3 + maxItems: 3 + + reg-names: + minItems: 3 + maxItems: 3 + + # On some SoCs the Prime core shares the LMH irq with Big cores + interrupts: + minItems: 2 + maxItems: 2 + + interrupt-names: + minItems: 2 + + examples: - | #include @@ -236,7 +324,7 @@ examples: #size-cells = <1>; cpufreq@17d43000 { - compatible = "qcom,cpufreq-hw"; + compatible = "qcom,sdm845-cpufreq-hw", "qcom,cpufreq-hw"; reg = <0x17d43000 0x1400>, <0x17d45800 0x1400>; reg-names = "freq-domain0", "freq-domain1"; From e69003202434131510ab18acda7d251b1975ecb1 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Wed, 8 Mar 2023 02:27:00 +0100 Subject: [PATCH 24/29] dt-bindings: cpufreq: cpufreq-qcom-hw: Add QCM2290 Document the OSM CPUFREQ_HW present on QCM2290, featuring just one lonely frequency domain. Signed-off-by: Konrad Dybcio Reviewed-by: Rob Herring Signed-off-by: Viresh Kumar --- .../bindings/cpufreq/cpufreq-qcom-hw.yaml | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml b/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml index b152fbe8a668..a6b3bb8fdf33 100644 --- a/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml +++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml @@ -21,6 +21,7 @@ properties: - description: v1 of CPUFREQ HW items: - enum: + - qcom,qcm2290-cpufreq-hw - qcom,sc7180-cpufreq-hw - qcom,sdm845-cpufreq-hw - qcom,sm6115-cpufreq-hw @@ -93,6 +94,29 @@ required: additionalProperties: false allOf: + - if: + properties: + compatible: + contains: + enum: + - qcom,qcm2290-cpufreq-hw + then: + properties: + reg: + minItems: 1 + maxItems: 1 + + reg-names: + minItems: 1 + maxItems: 1 + + interrupts: + minItems: 1 + maxItems: 1 + + interrupt-names: + minItems: 1 + - if: properties: compatible: From e2b47e585931a988c856fd4ba31e1296f749aee3 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Thu, 23 Mar 2023 15:33:43 -0700 Subject: [PATCH 25/29] cpufreq: qcom-cpufreq-hw: Revert adding cpufreq qos The OSM/EPSS hardware controls the frequency of each CPU cluster based on requests from the OS and various throttling events in the system. While throttling is in effect the related dcvs interrupt will be kept high. The purpose of the code handling this interrupt is to continuously report the thermal pressure based on the throttled frequency. The reasoning for adding QoS control to this mechanism is not entirely clear, but the introduction of commit 'c4c0efb06f17 ("cpufreq: qcom-cpufreq-hw: Add cpufreq qos for LMh")' causes the scaling_max_frequncy to be set to the throttled frequency. On the next iteration of polling, the throttled frequency is above or equal to the newly requested frequency, so the polling is stopped. With cpufreq limiting the max frequency, the hardware no longer report a throttling state and no further updates to thermal pressure or qos state are made. The result of this is that scaling_max_frequency can only go down, and the system becomes slower and slower every time a thermal throttling event is reported by the hardware. Even if the logic could be improved, there is no reason for software to limit the max freqency in response to the hardware limiting the max frequency. At best software will follow the reported hardware state, but typically it will cause slower backoff of the throttling. This reverts commit c4c0efb06f17fa4a37ad99e7752b18a5405c76dc. Fixes: c4c0efb06f17 ("cpufreq: qcom-cpufreq-hw: Add cpufreq qos for LMh") Reported-by: Krzysztof Kozlowski Signed-off-by: Bjorn Andersson Reviewed-by: Konrad Dybcio Signed-off-by: Viresh Kumar --- drivers/cpufreq/qcom-cpufreq-hw.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index b75e6157dc81..eb54f7f17ab5 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include @@ -59,8 +58,6 @@ struct qcom_cpufreq_data { struct clk_hw cpu_clk; bool per_core_dcvs; - - struct freq_qos_request throttle_freq_req; }; static struct { @@ -350,8 +347,6 @@ static void qcom_lmh_dcvs_notify(struct qcom_cpufreq_data *data) throttled_freq = freq_hz / HZ_PER_KHZ; - freq_qos_update_request(&data->throttle_freq_req, throttled_freq); - /* Update thermal pressure (the boost frequencies are accepted) */ arch_update_thermal_pressure(policy->related_cpus, throttled_freq); @@ -444,14 +439,6 @@ static int qcom_cpufreq_hw_lmh_init(struct cpufreq_policy *policy, int index) if (data->throttle_irq < 0) return data->throttle_irq; - ret = freq_qos_add_request(&policy->constraints, - &data->throttle_freq_req, FREQ_QOS_MAX, - FREQ_QOS_MAX_DEFAULT_VALUE); - if (ret < 0) { - dev_err(&pdev->dev, "Failed to add freq constraint (%d)\n", ret); - return ret; - } - data->cancel_throttle = false; data->policy = policy; @@ -518,7 +505,6 @@ static void qcom_cpufreq_hw_lmh_exit(struct qcom_cpufreq_data *data) if (data->throttle_irq <= 0) return; - freq_qos_remove_request(&data->throttle_freq_req); free_irq(data->throttle_irq, data); } From a038895e25b296ca1ef0254f92673ea64bc1a2ee Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 4 Apr 2023 05:09:10 +0530 Subject: [PATCH 26/29] cpufreq: drivers with target_index() must set freq_table Since the cpufreq core directly uses freq_table, for cpufreq drivers that set their target_index() callback, make it mandatory for them to set the same. Since this is set per policy and normally from policy->init(), do this from cpufreq_table_validate_and_sort() which gets called right after ->init(). Reported-by: Yajun Deng Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 5 +++++ drivers/cpufreq/freq_table.c | 7 ++++++- include/linux/cpufreq.h | 1 + 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index d61f7308f63c..b24ed81ba8bd 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -73,6 +73,11 @@ static inline bool has_target(void) return cpufreq_driver->target_index || cpufreq_driver->target; } +bool has_target_index(void) +{ + return !!cpufreq_driver->target_index; +} + /* internal prototypes */ static unsigned int __cpufreq_get(struct cpufreq_policy *policy); static int cpufreq_init_governor(struct cpufreq_policy *policy); diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c index 67e56cf638ef..ddd4832bcc0d 100644 --- a/drivers/cpufreq/freq_table.c +++ b/drivers/cpufreq/freq_table.c @@ -355,8 +355,13 @@ int cpufreq_table_validate_and_sort(struct cpufreq_policy *policy) { int ret; - if (!policy->freq_table) + if (!policy->freq_table) { + /* Freq table must be passed by drivers with target_index() */ + if (has_target_index()) + return -EINVAL; + return 0; + } ret = cpufreq_frequency_table_cpuinfo(policy, policy->freq_table); if (ret) diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 65623233ab2f..541013487a0e 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -237,6 +237,7 @@ bool cpufreq_supports_freq_invariance(void); struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy); void cpufreq_enable_fast_switch(struct cpufreq_policy *policy); void cpufreq_disable_fast_switch(struct cpufreq_policy *policy); +bool has_target_index(void); #else static inline unsigned int cpufreq_get(unsigned int cpu) { From 11fa52fe619acfa945712d94a0bc27c0f5bc49de Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Mon, 3 Apr 2023 20:33:37 -0400 Subject: [PATCH 27/29] cpufreq: amd-pstate: Make varaiable mode_state_machine static smatch reports drivers/cpufreq/amd-pstate.c:907:25: warning: symbol 'mode_state_machine' was not declared. Should it be static? This variable is only used in one file so it should be static. Signed-off-by: Tom Rix Reviewed-by: Wyes Karny Tested-by: Wyes Karny Reviewed-by: Dhruva Gole [ rjw: Subject edits ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/amd-pstate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 7955cfc91c31..fcb54a6f6598 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -905,7 +905,7 @@ static int amd_pstate_change_driver_mode(int mode) return 0; } -cppc_mode_transition_fn mode_state_machine[AMD_PSTATE_MAX][AMD_PSTATE_MAX] = { +static cppc_mode_transition_fn mode_state_machine[AMD_PSTATE_MAX][AMD_PSTATE_MAX] = { [AMD_PSTATE_DISABLE] = { [AMD_PSTATE_DISABLE] = NULL, [AMD_PSTATE_PASSIVE] = amd_pstate_register_driver, From f41e1442ac5bd687389d6104d7b74766db821eb3 Mon Sep 17 00:00:00 2001 From: Sumit Gupta Date: Tue, 11 Apr 2023 16:29:59 +0530 Subject: [PATCH 28/29] cpufreq: tegra194: add OPP support and set bandwidth Add support to use OPP table from DT in Tegra194 cpufreq driver. Tegra SoC's receive the frequency lookup table (LUT) from BPMP-FW. Cross check the OPP's present in DT against the LUT from BPMP-FW and enable only those DT OPP's which are present in LUT also. The OPP table in DT has CPU Frequency to bandwidth mapping where the bandwidth value is per MC channel. DRAM bandwidth depends on the number of MC channels which can vary as per the boot configuration. This per channel bandwidth from OPP table will be later converted by MC driver to final bandwidth value by multiplying with number of channels before sending the request to BPMP-FW. If OPP table is not present in DT, then use the LUT from BPMP-FW directy as the CPU frequency table and not do the DRAM frequency scaling which is same as the current behavior. Now, as the CPU Frequency table is being controlling through OPP table in DT. Keeping fewer entries in the table will create less frequency steps and can help to scale fast to high frequencies when required. Signed-off-by: Sumit Gupta Signed-off-by: Viresh Kumar --- drivers/cpufreq/tegra194-cpufreq.c | 156 ++++++++++++++++++++++++++--- 1 file changed, 143 insertions(+), 13 deletions(-) diff --git a/drivers/cpufreq/tegra194-cpufreq.c b/drivers/cpufreq/tegra194-cpufreq.c index 5890e25d7f77..c8d03346068a 100644 --- a/drivers/cpufreq/tegra194-cpufreq.c +++ b/drivers/cpufreq/tegra194-cpufreq.c @@ -12,6 +12,7 @@ #include #include #include +#include #include @@ -65,12 +66,36 @@ struct tegra_cpufreq_soc { struct tegra194_cpufreq_data { void __iomem *regs; - struct cpufreq_frequency_table **tables; + struct cpufreq_frequency_table **bpmp_luts; const struct tegra_cpufreq_soc *soc; + bool icc_dram_bw_scaling; }; static struct workqueue_struct *read_counters_wq; +static int tegra_cpufreq_set_bw(struct cpufreq_policy *policy, unsigned long freq_khz) +{ + struct tegra194_cpufreq_data *data = cpufreq_get_driver_data(); + struct dev_pm_opp *opp; + struct device *dev; + int ret; + + dev = get_cpu_device(policy->cpu); + if (!dev) + return -ENODEV; + + opp = dev_pm_opp_find_freq_exact(dev, freq_khz * KHZ, true); + if (IS_ERR(opp)) + return PTR_ERR(opp); + + ret = dev_pm_opp_set_opp(dev, opp); + if (ret) + data->icc_dram_bw_scaling = false; + + dev_pm_opp_put(opp); + return ret; +} + static void tegra_get_cpu_mpidr(void *mpidr) { *((u64 *)mpidr) = read_cpuid_mpidr() & MPIDR_HWID_BITMASK; @@ -354,7 +379,7 @@ static unsigned int tegra194_get_speed(u32 cpu) * to the last written ndiv value from freq_table. This is * done to return consistent value. */ - cpufreq_for_each_valid_entry(pos, data->tables[clusterid]) { + cpufreq_for_each_valid_entry(pos, data->bpmp_luts[clusterid]) { if (pos->driver_data != ndiv) continue; @@ -369,16 +394,93 @@ static unsigned int tegra194_get_speed(u32 cpu) return rate; } +static int tegra_cpufreq_init_cpufreq_table(struct cpufreq_policy *policy, + struct cpufreq_frequency_table *bpmp_lut, + struct cpufreq_frequency_table **opp_table) +{ + struct tegra194_cpufreq_data *data = cpufreq_get_driver_data(); + struct cpufreq_frequency_table *freq_table = NULL; + struct cpufreq_frequency_table *pos; + struct device *cpu_dev; + struct dev_pm_opp *opp; + unsigned long rate; + int ret, max_opps; + int j = 0; + + cpu_dev = get_cpu_device(policy->cpu); + if (!cpu_dev) { + pr_err("%s: failed to get cpu%d device\n", __func__, policy->cpu); + return -ENODEV; + } + + /* Initialize OPP table mentioned in operating-points-v2 property in DT */ + ret = dev_pm_opp_of_add_table_indexed(cpu_dev, 0); + if (!ret) { + max_opps = dev_pm_opp_get_opp_count(cpu_dev); + if (max_opps <= 0) { + dev_err(cpu_dev, "Failed to add OPPs\n"); + return max_opps; + } + + /* Disable all opps and cross-validate against LUT later */ + for (rate = 0; ; rate++) { + opp = dev_pm_opp_find_freq_ceil(cpu_dev, &rate); + if (IS_ERR(opp)) + break; + + dev_pm_opp_put(opp); + dev_pm_opp_disable(cpu_dev, rate); + } + } else { + dev_err(cpu_dev, "Invalid or empty opp table in device tree\n"); + data->icc_dram_bw_scaling = false; + return ret; + } + + freq_table = kcalloc((max_opps + 1), sizeof(*freq_table), GFP_KERNEL); + if (!freq_table) + return -ENOMEM; + + /* + * Cross check the frequencies from BPMP-FW LUT against the OPP's present in DT. + * Enable only those DT OPP's which are present in LUT also. + */ + cpufreq_for_each_valid_entry(pos, bpmp_lut) { + opp = dev_pm_opp_find_freq_exact(cpu_dev, pos->frequency * KHZ, false); + if (IS_ERR(opp)) + continue; + + ret = dev_pm_opp_enable(cpu_dev, pos->frequency * KHZ); + if (ret < 0) + return ret; + + freq_table[j].driver_data = pos->driver_data; + freq_table[j].frequency = pos->frequency; + j++; + } + + freq_table[j].driver_data = pos->driver_data; + freq_table[j].frequency = CPUFREQ_TABLE_END; + + *opp_table = &freq_table[0]; + + dev_pm_opp_set_sharing_cpus(cpu_dev, policy->cpus); + + return ret; +} + static int tegra194_cpufreq_init(struct cpufreq_policy *policy) { struct tegra194_cpufreq_data *data = cpufreq_get_driver_data(); int maxcpus_per_cluster = data->soc->maxcpus_per_cluster; + struct cpufreq_frequency_table *freq_table; + struct cpufreq_frequency_table *bpmp_lut; u32 start_cpu, cpu; u32 clusterid; + int ret; data->soc->ops->get_cpu_cluster_id(policy->cpu, NULL, &clusterid); - - if (clusterid >= data->soc->num_clusters || !data->tables[clusterid]) + if (clusterid >= data->soc->num_clusters || !data->bpmp_luts[clusterid]) return -EINVAL; start_cpu = rounddown(policy->cpu, maxcpus_per_cluster); @@ -387,9 +489,22 @@ static int tegra194_cpufreq_init(struct cpufreq_policy *policy) if (cpu_possible(cpu)) cpumask_set_cpu(cpu, policy->cpus); } - policy->freq_table = data->tables[clusterid]; policy->cpuinfo.transition_latency = TEGRA_CPUFREQ_TRANSITION_LATENCY; + bpmp_lut = data->bpmp_luts[clusterid]; + + if (data->icc_dram_bw_scaling) { + ret = tegra_cpufreq_init_cpufreq_table(policy, bpmp_lut, &freq_table); + if (!ret) { + policy->freq_table = freq_table; + return 0; + } + } + + data->icc_dram_bw_scaling = false; + policy->freq_table = bpmp_lut; + pr_info("OPP tables missing from DT, EMC frequency scaling disabled\n"); + return 0; } @@ -406,6 +521,9 @@ static int tegra194_cpufreq_set_target(struct cpufreq_policy *policy, */ data->soc->ops->set_cpu_ndiv(policy, (u64)tbl->driver_data); + if (data->icc_dram_bw_scaling) + tegra_cpufreq_set_bw(policy, tbl->frequency); + return 0; } @@ -439,8 +557,8 @@ static void tegra194_cpufreq_free_resources(void) } static struct cpufreq_frequency_table * -init_freq_table(struct platform_device *pdev, struct tegra_bpmp *bpmp, - unsigned int cluster_id) +tegra_cpufreq_bpmp_read_lut(struct platform_device *pdev, struct tegra_bpmp *bpmp, + unsigned int cluster_id) { struct cpufreq_frequency_table *freq_table; struct mrq_cpu_ndiv_limits_response resp; @@ -515,6 +633,7 @@ static int tegra194_cpufreq_probe(struct platform_device *pdev) const struct tegra_cpufreq_soc *soc; struct tegra194_cpufreq_data *data; struct tegra_bpmp *bpmp; + struct device *cpu_dev; int err, i; data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL); @@ -530,9 +649,9 @@ static int tegra194_cpufreq_probe(struct platform_device *pdev) return -EINVAL; } - data->tables = devm_kcalloc(&pdev->dev, data->soc->num_clusters, - sizeof(*data->tables), GFP_KERNEL); - if (!data->tables) + data->bpmp_luts = devm_kcalloc(&pdev->dev, data->soc->num_clusters, + sizeof(*data->bpmp_luts), GFP_KERNEL); + if (!data->bpmp_luts) return -ENOMEM; if (soc->actmon_cntr_base) { @@ -556,15 +675,26 @@ static int tegra194_cpufreq_probe(struct platform_device *pdev) } for (i = 0; i < data->soc->num_clusters; i++) { - data->tables[i] = init_freq_table(pdev, bpmp, i); - if (IS_ERR(data->tables[i])) { - err = PTR_ERR(data->tables[i]); + data->bpmp_luts[i] = tegra_cpufreq_bpmp_read_lut(pdev, bpmp, i); + if (IS_ERR(data->bpmp_luts[i])) { + err = PTR_ERR(data->bpmp_luts[i]); goto err_free_res; } } tegra194_cpufreq_driver.driver_data = data; + /* Check for optional OPPv2 and interconnect paths on CPU0 to enable ICC scaling */ + cpu_dev = get_cpu_device(0); + if (!cpu_dev) + return -EPROBE_DEFER; + + if (dev_pm_opp_of_get_opp_desc_node(cpu_dev)) { + err = dev_pm_opp_of_find_icc_paths(cpu_dev, NULL); + if (!err) + data->icc_dram_bw_scaling = true; + } + err = cpufreq_register_driver(&tegra194_cpufreq_driver); if (!err) goto put_bpmp; From 44295af5019f1997d038ad2611086a2d1e2af167 Mon Sep 17 00:00:00 2001 From: Sanjay Chandrashekara Date: Tue, 18 Apr 2023 17:04:54 +0530 Subject: [PATCH 29/29] cpufreq: use correct unit when verify cur freq cpufreq_verify_current_freq checks() if the frequency returned by the hardware has a slight delta with the valid frequency value last set and returns "policy->cur" if the delta is within "1 MHz". In the comparison, "policy->cur" is in "kHz" but it's compared against HZ_PER_MHZ. So, the comparison range becomes "1 GHz". Fix this by comparing against KHZ_PER_MHZ instead of HZ_PER_MHZ. Fixes: f55ae08c8987 ("cpufreq: Avoid unnecessary frequency updates due to mismatch") Signed-off-by: Sanjay Chandrashekara [ sumit gupta: Commit message update ] Signed-off-by: Sumit Gupta Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index b24ed81ba8bd..2ac905c637dd 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1732,7 +1732,7 @@ static unsigned int cpufreq_verify_current_freq(struct cpufreq_policy *policy, b * MHz. In such cases it is better to avoid getting into * unnecessary frequency updates. */ - if (abs(policy->cur - new_freq) < HZ_PER_MHZ) + if (abs(policy->cur - new_freq) < KHZ_PER_MHZ) return policy->cur; cpufreq_out_of_sync(policy, new_freq);