From 91e8f835a7eda4ba2c0c4002a3108a0e3b22d34e Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Mon, 30 Sep 2024 16:17:56 +0800 Subject: [PATCH 1/6] powercap: intel_rapl_tpmi: Fix bogus register reading The TPMI_RAPL_REG_DOMAIN_INFO value needs to be multiplied by 8 to get the register offset. Cc: All applicable Fixes: 903eb9fb85e3 ("powercap: intel_rapl_tpmi: Fix System Domain probing") Signed-off-by: Zhang Rui Link: https://patch.msgid.link/20240930081801.28502-2-rui.zhang@intel.com [ rjw: Changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/powercap/intel_rapl_tpmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/powercap/intel_rapl_tpmi.c b/drivers/powercap/intel_rapl_tpmi.c index 947544e4d229..7435df083ea4 100644 --- a/drivers/powercap/intel_rapl_tpmi.c +++ b/drivers/powercap/intel_rapl_tpmi.c @@ -181,7 +181,7 @@ static int parse_one_domain(struct tpmi_rapl_package *trp, u32 offset) pr_warn(FW_BUG "System domain must support Domain Info register\n"); return -ENODEV; } - tpmi_domain_info = readq(trp->base + offset + TPMI_RAPL_REG_DOMAIN_INFO); + tpmi_domain_info = readq(trp->base + offset + TPMI_RAPL_REG_DOMAIN_INFO * 8); if (!(tpmi_domain_info & TPMI_RAPL_DOMAIN_ROOT)) return 0; domain_type = RAPL_DOMAIN_PLATFORM; From 99ca0b57e49fb73624eede1c4396d9e3d10ccf14 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Mon, 30 Sep 2024 16:17:57 +0800 Subject: [PATCH 2/6] thermal: intel: int340x: processor: Fix warning during module unload The processor_thermal driver uses pcim_device_enable() to enable a PCI device, which means the device will be automatically disabled on driver detach. Thus there is no need to call pci_disable_device() again on it. With recent PCI device resource management improvements, e.g. commit f748a07a0b64 ("PCI: Remove legacy pcim_release()"), this problem is exposed and triggers the warining below. [ 224.010735] proc_thermal_pci 0000:00:04.0: disabling already-disabled device [ 224.010747] WARNING: CPU: 8 PID: 4442 at drivers/pci/pci.c:2250 pci_disable_device+0xe5/0x100 ... [ 224.010844] Call Trace: [ 224.010845] [ 224.010847] ? show_regs+0x6d/0x80 [ 224.010851] ? __warn+0x8c/0x140 [ 224.010854] ? pci_disable_device+0xe5/0x100 [ 224.010856] ? report_bug+0x1c9/0x1e0 [ 224.010859] ? handle_bug+0x46/0x80 [ 224.010862] ? exc_invalid_op+0x1d/0x80 [ 224.010863] ? asm_exc_invalid_op+0x1f/0x30 [ 224.010867] ? pci_disable_device+0xe5/0x100 [ 224.010869] ? pci_disable_device+0xe5/0x100 [ 224.010871] ? kfree+0x21a/0x2b0 [ 224.010873] pcim_disable_device+0x20/0x30 [ 224.010875] devm_action_release+0x16/0x20 [ 224.010878] release_nodes+0x47/0xc0 [ 224.010880] devres_release_all+0x9f/0xe0 [ 224.010883] device_unbind_cleanup+0x12/0x80 [ 224.010885] device_release_driver_internal+0x1ca/0x210 [ 224.010887] driver_detach+0x4e/0xa0 [ 224.010889] bus_remove_driver+0x6f/0xf0 [ 224.010890] driver_unregister+0x35/0x60 [ 224.010892] pci_unregister_driver+0x44/0x90 [ 224.010894] proc_thermal_pci_driver_exit+0x14/0x5f0 [processor_thermal_device_pci] ... [ 224.010921] ---[ end trace 0000000000000000 ]--- Remove the excess pci_disable_device() calls. Fixes: acd65d5d1cf4 ("thermal/drivers/int340x/processor_thermal: Add PCI MMIO based thermal driver") Signed-off-by: Zhang Rui Reviewed-by: Srinivas Pandruvada Link: https://patch.msgid.link/20240930081801.28502-3-rui.zhang@intel.com [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- .../intel/int340x_thermal/processor_thermal_device_pci.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c index 006614921870..ba5d36d36fc4 100644 --- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c +++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c @@ -416,7 +416,6 @@ static int proc_thermal_pci_probe(struct pci_dev *pdev, const struct pci_device_ if (!pci_info->no_legacy) proc_thermal_remove(proc_priv); proc_thermal_mmio_remove(pdev, proc_priv); - pci_disable_device(pdev); return ret; } @@ -438,7 +437,6 @@ static void proc_thermal_pci_remove(struct pci_dev *pdev) proc_thermal_mmio_remove(pdev, pci_info->proc_priv); if (!pci_info->no_legacy) proc_thermal_remove(proc_priv); - pci_disable_device(pdev); } #ifdef CONFIG_PM_SLEEP From 1d390923974cc233245649cf23833e06b15a9ef7 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Mon, 30 Sep 2024 16:17:58 +0800 Subject: [PATCH 3/6] powercap: intel_rapl_tpmi: Ignore minor version change The hardware definition of every TPMI feature contains a major and minor version. When there is a change in the MMIO offset or change in the definition of a field, hardware will change major version. For addition of new fields without modifying existing MMIO offsets or fields, only the minor version is changed. If the driver has not been updated to recognize a new hardware major version, it cannot provide the RAPL interface to users due to possible register layout incompatibilities. However, the driver does not need to be updated every time the hardware minor version changes because in that case it will just miss some new functionality exposed by the hardware. The current implementation causes the driver to refuse to work for any hardware version change which is unnecessarily restrictive. If there is a minor version mismatch, log an information message and continue, but if there is a major version mismatch, log a warning and exit (as before). Signed-off-by: Zhang Rui Reviewed-by: Srinivas Pandruvada Link: https://patch.msgid.link/20240930081801.28502-4-rui.zhang@intel.com Fixes: 9eef7f9da928 ("powercap: intel_rapl: Introduce RAPL TPMI interface driver") [ rjw: Changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/powercap/intel_rapl_tpmi.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/powercap/intel_rapl_tpmi.c b/drivers/powercap/intel_rapl_tpmi.c index 7435df083ea4..645fd1dc51a9 100644 --- a/drivers/powercap/intel_rapl_tpmi.c +++ b/drivers/powercap/intel_rapl_tpmi.c @@ -15,7 +15,8 @@ #include #include -#define TPMI_RAPL_VERSION 1 +#define TPMI_RAPL_MAJOR_VERSION 0 +#define TPMI_RAPL_MINOR_VERSION 1 /* 1 header + 10 registers + 5 reserved. 8 bytes for each. */ #define TPMI_RAPL_DOMAIN_SIZE 128 @@ -154,11 +155,21 @@ static int parse_one_domain(struct tpmi_rapl_package *trp, u32 offset) tpmi_domain_size = tpmi_domain_header >> 16 & 0xff; tpmi_domain_flags = tpmi_domain_header >> 32 & 0xffff; - if (tpmi_domain_version != TPMI_RAPL_VERSION) { - pr_warn(FW_BUG "Unsupported version:%d\n", tpmi_domain_version); + if (tpmi_domain_version == TPMI_VERSION_INVALID) { + pr_warn(FW_BUG "Invalid version\n"); return -ENODEV; } + if (TPMI_MAJOR_VERSION(tpmi_domain_version) != TPMI_RAPL_MAJOR_VERSION) { + pr_warn(FW_BUG "Unsupported major version:%ld\n", + TPMI_MAJOR_VERSION(tpmi_domain_version)); + return -ENODEV; + } + + if (TPMI_MINOR_VERSION(tpmi_domain_version) > TPMI_RAPL_MINOR_VERSION) + pr_info("Ignore: Unsupported minor version:%ld\n", + TPMI_MINOR_VERSION(tpmi_domain_version)); + /* Domain size: in unit of 128 Bytes */ if (tpmi_domain_size != 1) { pr_warn(FW_BUG "Invalid Domain size %d\n", tpmi_domain_size); From f517ff174ab79dd59f538a9aa2770cd3ee6dd48b Mon Sep 17 00:00:00 2001 From: Sumeet Pawnikar Date: Mon, 30 Sep 2024 16:17:59 +0800 Subject: [PATCH 4/6] powercap: intel_rapl_msr: Add PL4 support for Arrowlake-U Add PL4 support for ArrowLake-U platform. Signed-off-by: Sumeet Pawnikar Signed-off-by: Zhang Rui Reviewed-by: Srinivas Pandruvada Link: https://patch.msgid.link/20240930081801.28502-5-rui.zhang@intel.com Signed-off-by: Rafael J. Wysocki --- drivers/powercap/intel_rapl_msr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/powercap/intel_rapl_msr.c b/drivers/powercap/intel_rapl_msr.c index 733a36f67fbc..1f4c5389676a 100644 --- a/drivers/powercap/intel_rapl_msr.c +++ b/drivers/powercap/intel_rapl_msr.c @@ -147,6 +147,7 @@ static const struct x86_cpu_id pl4_support_ids[] = { X86_MATCH_VFM(INTEL_RAPTORLAKE_P, NULL), X86_MATCH_VFM(INTEL_METEORLAKE, NULL), X86_MATCH_VFM(INTEL_METEORLAKE_L, NULL), + X86_MATCH_VFM(INTEL_ARROWLAKE_U, NULL), {} }; From bfc6819e4bf56a55df6178f93241b5845ad672eb Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Mon, 30 Sep 2024 16:18:00 +0800 Subject: [PATCH 5/6] thermal: intel: int340x: processor: Remove MMIO RAPL CPU hotplug support CPU0/package0 is always online and the MMIO RAPL driver runs on single package systems only, so there is no need to handle CPU hotplug in it. Always register a RAPL package device for package 0 and remove the unnecessary CPU hotplug support. Signed-off-by: Zhang Rui Reviewed-by: Srinivas Pandruvada Link: https://patch.msgid.link/20240930081801.28502-6-rui.zhang@intel.com [ rjw: Subject edits ] Signed-off-by: Rafael J. Wysocki --- .../int340x_thermal/processor_thermal_rapl.c | 66 +++++++------------ 1 file changed, 22 insertions(+), 44 deletions(-) diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c index e9aa9e23aab9..769510e748c0 100644 --- a/drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c +++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c @@ -19,42 +19,6 @@ static const struct rapl_mmio_regs rapl_mmio_default = { .limits[RAPL_DOMAIN_DRAM] = BIT(POWER_LIMIT2), }; -static int rapl_mmio_cpu_online(unsigned int cpu) -{ - struct rapl_package *rp; - - /* mmio rapl supports package 0 only for now */ - if (topology_physical_package_id(cpu)) - return 0; - - rp = rapl_find_package_domain_cpuslocked(cpu, &rapl_mmio_priv, true); - if (!rp) { - rp = rapl_add_package_cpuslocked(cpu, &rapl_mmio_priv, true); - if (IS_ERR(rp)) - return PTR_ERR(rp); - } - cpumask_set_cpu(cpu, &rp->cpumask); - return 0; -} - -static int rapl_mmio_cpu_down_prep(unsigned int cpu) -{ - struct rapl_package *rp; - int lead_cpu; - - rp = rapl_find_package_domain_cpuslocked(cpu, &rapl_mmio_priv, true); - if (!rp) - return 0; - - cpumask_clear_cpu(cpu, &rp->cpumask); - lead_cpu = cpumask_first(&rp->cpumask); - if (lead_cpu >= nr_cpu_ids) - rapl_remove_package_cpuslocked(rp); - else if (rp->lead_cpu == cpu) - rp->lead_cpu = lead_cpu; - return 0; -} - static int rapl_mmio_read_raw(int cpu, struct reg_action *ra) { if (!ra->reg.mmio) @@ -82,6 +46,7 @@ static int rapl_mmio_write_raw(int cpu, struct reg_action *ra) int proc_thermal_rapl_add(struct pci_dev *pdev, struct proc_thermal_device *proc_priv) { const struct rapl_mmio_regs *rapl_regs = &rapl_mmio_default; + struct rapl_package *rp; enum rapl_domain_reg_id reg; enum rapl_domain_type domain; int ret; @@ -109,25 +74,38 @@ int proc_thermal_rapl_add(struct pci_dev *pdev, struct proc_thermal_device *proc return PTR_ERR(rapl_mmio_priv.control_type); } - ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "powercap/rapl:online", - rapl_mmio_cpu_online, rapl_mmio_cpu_down_prep); - if (ret < 0) { - powercap_unregister_control_type(rapl_mmio_priv.control_type); - rapl_mmio_priv.control_type = NULL; - return ret; + /* Register a RAPL package device for package 0 which is always online */ + rp = rapl_find_package_domain(0, &rapl_mmio_priv, false); + if (rp) { + ret = -EEXIST; + goto err; + } + + rp = rapl_add_package(0, &rapl_mmio_priv, false); + if (IS_ERR(rp)) { + ret = PTR_ERR(rp); + goto err; } - rapl_mmio_priv.pcap_rapl_online = ret; return 0; + +err: + powercap_unregister_control_type(rapl_mmio_priv.control_type); + rapl_mmio_priv.control_type = NULL; + return ret; } EXPORT_SYMBOL_GPL(proc_thermal_rapl_add); void proc_thermal_rapl_remove(void) { + struct rapl_package *rp; + if (IS_ERR_OR_NULL(rapl_mmio_priv.control_type)) return; - cpuhp_remove_state(rapl_mmio_priv.pcap_rapl_online); + rp = rapl_find_package_domain(0, &rapl_mmio_priv, false); + if (rp) + rapl_remove_package(rp); powercap_unregister_control_type(rapl_mmio_priv.control_type); } EXPORT_SYMBOL_GPL(proc_thermal_rapl_remove); From 3fb0eea8a1c4be5884e0731ea76cbd3ce126e1f3 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Mon, 30 Sep 2024 16:18:01 +0800 Subject: [PATCH 6/6] thermal: intel: int340x: processor: Add MMIO RAPL PL4 support Similar to the MSR RAPL interface, MMIO RAPL supports PL4 too, so add MMIO RAPL PL4d support to the processor_thermal driver. As a result, the powercap sysfs for MMIO RAPL will show a new "peak power" constraint. Signed-off-by: Zhang Rui Reviewed-by: Srinivas Pandruvada Link: https://patch.msgid.link/20240930081801.28502-7-rui.zhang@intel.com [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- .../thermal/intel/int340x_thermal/processor_thermal_rapl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c index 769510e748c0..bde2cc386afd 100644 --- a/drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c +++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c @@ -13,9 +13,9 @@ static struct rapl_if_priv rapl_mmio_priv; static const struct rapl_mmio_regs rapl_mmio_default = { .reg_unit = 0x5938, - .regs[RAPL_DOMAIN_PACKAGE] = { 0x59a0, 0x593c, 0x58f0, 0, 0x5930}, + .regs[RAPL_DOMAIN_PACKAGE] = { 0x59a0, 0x593c, 0x58f0, 0, 0x5930, 0x59b0}, .regs[RAPL_DOMAIN_DRAM] = { 0x58e0, 0x58e8, 0x58ec, 0, 0}, - .limits[RAPL_DOMAIN_PACKAGE] = BIT(POWER_LIMIT2), + .limits[RAPL_DOMAIN_PACKAGE] = BIT(POWER_LIMIT2) | BIT(POWER_LIMIT4), .limits[RAPL_DOMAIN_DRAM] = BIT(POWER_LIMIT2), };