From 5fad775d432c6c9158ea12e7e00d8922ef8d3dfc Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 27 Jan 2025 14:37:29 +0100 Subject: [PATCH 01/10] PM: EM: Drop unused parameter from em_adjust_new_capacity() The max_cap parameter is never used in em_adjust_new_capacity(), so drop it. No functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Lukasz Luba Link: https://patch.msgid.link/2369979.ElGaqSPkdT@rjwysocki.net --- kernel/power/energy_model.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index 3874f0e97651..c79bf3c8b0f1 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -728,8 +728,7 @@ static int em_recalc_and_update(struct device *dev, struct em_perf_domain *pd, * are correctly calculated. */ static void em_adjust_new_capacity(struct device *dev, - struct em_perf_domain *pd, - u64 max_cap) + struct em_perf_domain *pd) { struct em_perf_table __rcu *em_table; @@ -800,7 +799,7 @@ static void em_check_capacity_update(void) cpu, cpu_capacity, em_max_perf); dev = get_cpu_device(cpu); - em_adjust_new_capacity(dev, pd, cpu_capacity); + em_adjust_new_capacity(dev, pd); } free_cpumask_var(cpu_done_mask); From a8e62726ac0dd7b610c87ba1a938a5a9091c34df Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 27 Jan 2025 14:38:25 +0100 Subject: [PATCH 02/10] PM: EM: Slightly reduce em_check_capacity_update() overhead Every iteration of the loop over all possible CPUs in em_check_capacity_update() causes get_cpu_device() to be called twice for the same CPU, once indirectly via em_cpu_get() and once directly. Get rid of the indirect get_cpu_device() call by moving the direct invocation of it earlier and using em_pd_get() instead of em_cpu_get() to get a pd pointer for the dev one returned by it. This also exposes the fact that dev is needed to get a pd, so the code becomes somewhat easier to follow after it. No functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Lukasz Luba Link: https://patch.msgid.link/1925950.tdWV9SEqCh@rjwysocki.net --- kernel/power/energy_model.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index c79bf3c8b0f1..066bcf1c71a1 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -774,7 +774,8 @@ static void em_check_capacity_update(void) } cpufreq_cpu_put(policy); - pd = em_cpu_get(cpu); + dev = get_cpu_device(cpu); + pd = em_pd_get(dev); if (!pd || em_is_artificial(pd)) continue; @@ -798,7 +799,6 @@ static void em_check_capacity_update(void) pr_debug("updating cpu%d cpu_cap=%lu old capacity=%lu\n", cpu, cpu_capacity, em_max_perf); - dev = get_cpu_device(cpu); em_adjust_new_capacity(dev, pd); } From 258e231dc29fbd72bc82c16859a8304f71780ba2 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 17 Feb 2025 21:03:01 +0100 Subject: [PATCH 03/10] PM: Rearrange documentation related to __pm_runtime_disable() There are only two callers of __pm_runtime_disable(), one of which is device_suspend_late() and the other is pm_runtime_disable() that has its own kerneldoc comment and there are no plans to add any more of them. Since they use different values of the __pm_runtime_disable() second parameter, the actual code behavior is different in each case, but it is all documented in the __pm_runtime_disable() kerneldoc comment which is not particularly straightforward. For this reason, move the information from the __pm_runtime_disable() kerneldoc comment to the pm_runtime_disable() one and into a separate comment in device_suspend_late() and remove the __pm_runtime_disable() kerneldoc comment altogether. No functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Ulf Hansson Link: https://patch.msgid.link/12617588.O9o76ZdvQC@rjwysocki.net --- drivers/base/power/main.c | 4 ++++ drivers/base/power/runtime.c | 14 -------------- include/linux/pm_runtime.h | 15 +++++++++++---- 3 files changed, 15 insertions(+), 18 deletions(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 40e1d8d8a589..dffa2aa1ba7d 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1404,6 +1404,10 @@ static int device_suspend_late(struct device *dev, pm_message_t state, bool asyn TRACE_DEVICE(dev); TRACE_SUSPEND(0); + /* + * Disable runtime PM for the device without checking if there is a + * pending resume request for it. + */ __pm_runtime_disable(dev, false); dpm_wait_for_subordinate(dev, async); diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 2ee45841486b..a5aed89e1a6b 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -1460,20 +1460,6 @@ int pm_runtime_barrier(struct device *dev) } EXPORT_SYMBOL_GPL(pm_runtime_barrier); -/** - * __pm_runtime_disable - Disable runtime PM of a device. - * @dev: Device to handle. - * @check_resume: If set, check if there's a resume request for the device. - * - * Increment power.disable_depth for the device and if it was zero previously, - * cancel all pending runtime PM requests for the device and wait for all - * operations in progress to complete. The device can be either active or - * suspended after its runtime PM has been disabled. - * - * If @check_resume is set and there's a resume request pending when - * __pm_runtime_disable() is called and power.disable_depth is zero, the - * function will wake up the device before disabling its runtime PM. - */ void __pm_runtime_disable(struct device *dev, bool check_resume) { spin_lock_irq(&dev->power.lock); diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index d39dc863f612..72c62e1171ca 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -556,11 +556,18 @@ static inline int pm_runtime_set_suspended(struct device *dev) * pm_runtime_disable - Disable runtime PM for a device. * @dev: Target device. * - * Prevent the runtime PM framework from working with @dev (by incrementing its - * "blocking" counter). + * Prevent the runtime PM framework from working with @dev by incrementing its + * "disable" counter. * - * For each invocation of this function for @dev there must be a matching - * pm_runtime_enable() call in order for runtime PM to be enabled for it. + * If the counter is zero when this function runs and there is a pending runtime + * resume request for @dev, it will be resumed. If the counter is still zero at + * that point, all of the pending runtime PM requests for @dev will be canceled + * and all runtime PM operations in progress involving it will be waited for to + * complete. + * + * For each invocation of this function for @dev, there must be a matching + * pm_runtime_enable() call, so that runtime PM is eventually enabled for it + * again. */ static inline void pm_runtime_disable(struct device *dev) { From 1618f635bdf56f3ac158171114e9bf18db234cbf Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Tue, 18 Feb 2025 16:20:21 +0800 Subject: [PATCH 04/10] PM: EM: use kfree_rcu() to simplify the code The callback function of call_rcu() just calls kfree(), so use kfree_rcu() instead of call_rcu() + callback function. Signed-off-by: Li RongQing Reviewed-by: Lukasz Luba Link: https://patch.msgid.link/20250218082021.2766-1-lirongqing@baidu.com Signed-off-by: Rafael J. Wysocki --- kernel/power/energy_model.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index 066bcf1c71a1..16f6dcafdb90 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -161,14 +161,6 @@ static void em_debug_create_pd(struct device *dev) {} static void em_debug_remove_pd(struct device *dev) {} #endif -static void em_destroy_table_rcu(struct rcu_head *rp) -{ - struct em_perf_table __rcu *table; - - table = container_of(rp, struct em_perf_table, rcu); - kfree(table); -} - static void em_release_table_kref(struct kref *kref) { struct em_perf_table __rcu *table; @@ -176,7 +168,7 @@ static void em_release_table_kref(struct kref *kref) /* It was the last owner of this table so we can free */ table = container_of(kref, struct em_perf_table, kref); - call_rcu(&table->rcu, em_destroy_table_rcu); + kfree_rcu(table, rcu); } /** From a29ba0023ddfb060473a0f55f2944ccd1c19b408 Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Thu, 20 Feb 2025 11:40:33 +0000 Subject: [PATCH 05/10] MAINTAINERS: Add Energy Model framework as properly maintained The Energy Model framework had some recent grow and became a bit more complex. Add the proper contact points to maintainers so other developers can get the right support. Signed-off-by: Lukasz Luba Link: https://patch.msgid.link/20250220114103.515278-1-lukasz.luba@arm.com Signed-off-by: Rafael J. Wysocki --- MAINTAINERS | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index efee40ea589f..2c106088bfbf 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8518,6 +8518,15 @@ M: Maxim Levitsky S: Maintained F: drivers/media/rc/ene_ir.* +ENERGY MODEL +M: Lukasz Luba +M: "Rafael J. Wysocki" +L: linux-pm@vger.kernel.org +S: Maintained +F: kernel/power/energy_model.c +F: include/linux/energy_model.h +F: Documentation/power/energy-model.rst + EPAPR HYPERVISOR BYTE CHANNEL DEVICE DRIVER M: Laurentiu Tudor L: linuxppc-dev@lists.ozlabs.org From d2677d57d4b8ec63da8f51357bcf855c4cf47c78 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 27 Feb 2025 11:56:12 +0100 Subject: [PATCH 06/10] PM: runtime: Drop status check from pm_runtime_force_resume() Since pm_runtime_force_resume() requires pm_runtime_force_suspend() to be called before it on the same device, the runtime PM status of the device is RPM_SUSPENDED when it is called unless the device's runtime PM status is changed somewhere else in the meantime. However, even if that happens, the power.needs_force_resume check is still required to pass and that flag is only set by pm_runtime_force_suspend() once and it is cleared at the end of pm_runtime_force_resume(), so it cannot be taken into account twice in a row. According to the above, the pm_runtime_status_suspended(dev) check in pm_runtime_force_resume() is redundant, so drop it. Signed-off-by: Rafael J. Wysocki Reviewed-by: Ulf Hansson Link: https://patch.msgid.link/2309120.iZASKD2KPV@rjwysocki.net --- drivers/base/power/runtime.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index a5aed89e1a6b..1714358b541e 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -1945,7 +1945,7 @@ int pm_runtime_force_resume(struct device *dev) int (*callback)(struct device *); int ret = 0; - if (!pm_runtime_status_suspended(dev) || !dev->power.needs_force_resume) + if (!dev->power.needs_force_resume) goto out; /* From 72263869656d09a5f9727504bb1f3cb7b010f0e5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 25 Feb 2025 18:06:18 +0100 Subject: [PATCH 07/10] PM: runtime: Unify error handling during suspend and resume There is a confusing difference in error handling between rpm_suspend() and rpm_resume() related to the special way in which -EAGAIN and -EBUSY error values are treated by the former. Also, converting -EACCES coming from the callback to I/O error, which it quite likely is not, may confuse runtime PM users. To address the above, modify rpm_callback() to convert -EACCES coming from the driver to -EAGAIN and to set power.runtime_error only if the return value is not -EAGAIN or -EBUSY. This will cause the error handling in rpm_resume() and rpm_suspend() to work consistently, so drop the no longer needed -EAGAIN or -EBUSY special case from the latter and make it retry autosuspend if power.runtime_error is unset. Signed-off-by: Rafael J. Wysocki Reviewed-by: Ulf Hansson Link: https://patch.msgid.link/12620037.O9o76ZdvQC@rjwysocki.net --- drivers/base/power/runtime.c | 40 +++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 1714358b541e..da74e1c69f7a 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -448,8 +448,19 @@ static int rpm_callback(int (*cb)(struct device *), struct device *dev) retval = __rpm_callback(cb, dev); } - dev->power.runtime_error = retval; - return retval != -EACCES ? retval : -EIO; + /* + * Since -EACCES means that runtime PM is disabled for the given device, + * it should not be returned by runtime PM callbacks. If it is returned + * nevertheless, assume it to be a transient error and convert it to + * -EAGAIN. + */ + if (retval == -EACCES) + retval = -EAGAIN; + + if (retval != -EAGAIN && retval != -EBUSY) + dev->power.runtime_error = retval; + + return retval; } /** @@ -725,21 +736,18 @@ static int rpm_suspend(struct device *dev, int rpmflags) dev->power.deferred_resume = false; wake_up_all(&dev->power.wait_queue); - if (retval == -EAGAIN || retval == -EBUSY) { - dev->power.runtime_error = 0; + /* + * On transient errors, if the callback routine failed an autosuspend, + * and if the last_busy time has been updated so that there is a new + * autosuspend expiration time, automatically reschedule another + * autosuspend. + */ + if (!dev->power.runtime_error && (rpmflags & RPM_AUTO) && + pm_runtime_autosuspend_expiration(dev) != 0) + goto repeat; + + pm_runtime_cancel_pending(dev); - /* - * If the callback routine failed an autosuspend, and - * if the last_busy time has been updated so that there - * is a new autosuspend expiration time, automatically - * reschedule another autosuspend. - */ - if ((rpmflags & RPM_AUTO) && - pm_runtime_autosuspend_expiration(dev) != 0) - goto repeat; - } else { - pm_runtime_cancel_pending(dev); - } goto out; } From 860a731f52f83309c213b943bac8f4ea70a88805 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 5 Mar 2025 22:08:21 +0100 Subject: [PATCH 08/10] PM: EM: Consify two parameters of em_dev_register_perf_domain() Notice that em_dev_register_perf_domain() and the functions called by it do not update objects pointed to by its cb and cpus parameters, so the const modifier can be added to them. This allows the return value of cpumask_of() or a pointer to a struct em_data_callback declared as const to be passed to em_dev_register_perf_domain() directly without explicit type casting which is rather handy. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Lukasz Luba Link: https://patch.msgid.link/4648962.LvFx2qVVIh@rjwysocki.net --- include/linux/energy_model.h | 8 ++++---- kernel/power/energy_model.c | 11 ++++++----- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index 78318d49276d..b23c8c798dac 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -169,8 +169,8 @@ struct em_perf_domain *em_pd_get(struct device *dev); int em_dev_update_perf_domain(struct device *dev, struct em_perf_table __rcu *new_table); int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, - struct em_data_callback *cb, cpumask_t *span, - bool microwatts); + const struct em_data_callback *cb, + const cpumask_t *cpus, bool microwatts); void em_dev_unregister_perf_domain(struct device *dev); struct em_perf_table __rcu *em_table_alloc(struct em_perf_domain *pd); void em_table_free(struct em_perf_table __rcu *table); @@ -346,8 +346,8 @@ struct em_data_callback {}; static inline int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, - struct em_data_callback *cb, cpumask_t *span, - bool microwatts) + const struct em_data_callback *cb, + const cpumask_t *cpus, bool microwatts) { return -EINVAL; } diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index 16f6dcafdb90..1e3caa96c271 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -231,7 +231,7 @@ static void em_init_performance(struct device *dev, struct em_perf_domain *pd, } static int em_compute_costs(struct device *dev, struct em_perf_state *table, - struct em_data_callback *cb, int nr_states, + const struct em_data_callback *cb, int nr_states, unsigned long flags) { unsigned long prev_cost = ULONG_MAX; @@ -333,7 +333,7 @@ EXPORT_SYMBOL_GPL(em_dev_update_perf_domain); static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd, struct em_perf_state *table, - struct em_data_callback *cb, + const struct em_data_callback *cb, unsigned long flags) { unsigned long power, freq, prev_freq = 0; @@ -388,7 +388,8 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd, } static int em_create_pd(struct device *dev, int nr_states, - struct em_data_callback *cb, cpumask_t *cpus, + const struct em_data_callback *cb, + const cpumask_t *cpus, unsigned long flags) { struct em_perf_table __rcu *em_table; @@ -548,8 +549,8 @@ EXPORT_SYMBOL_GPL(em_cpu_get); * Return 0 on success */ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, - struct em_data_callback *cb, cpumask_t *cpus, - bool microwatts) + const struct em_data_callback *cb, + const cpumask_t *cpus, bool microwatts) { unsigned long cap, prev_cap = 0; unsigned long flags = 0; From 3ee7be9e10dd5f79448788b899591d4bd2bf0c19 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 6 Mar 2025 17:49:20 +0100 Subject: [PATCH 09/10] PM: EM: Address RCU-related sparse warnings The usage of __rcu in the Energy Model code is quite inconsistent which causes the following sparse warnings to trigger: kernel/power/energy_model.c:169:15: warning: incorrect type in assignment (different address spaces) kernel/power/energy_model.c:169:15: expected struct em_perf_table [noderef] __rcu *table kernel/power/energy_model.c:169:15: got struct em_perf_table * kernel/power/energy_model.c:171:9: warning: incorrect type in argument 1 (different address spaces) kernel/power/energy_model.c:171:9: expected struct callback_head *head kernel/power/energy_model.c:171:9: got struct callback_head [noderef] __rcu * kernel/power/energy_model.c:171:9: warning: cast removes address space '__rcu' of expression kernel/power/energy_model.c:182:19: warning: incorrect type in argument 1 (different address spaces) kernel/power/energy_model.c:182:19: expected struct kref *kref kernel/power/energy_model.c:182:19: got struct kref [noderef] __rcu * kernel/power/energy_model.c:200:15: warning: incorrect type in assignment (different address spaces) kernel/power/energy_model.c:200:15: expected struct em_perf_table [noderef] __rcu *table kernel/power/energy_model.c:200:15: got void *[assigned] _res kernel/power/energy_model.c:204:20: warning: incorrect type in argument 1 (different address spaces) kernel/power/energy_model.c:204:20: expected struct kref *kref kernel/power/energy_model.c:204:20: got struct kref [noderef] __rcu * kernel/power/energy_model.c:320:19: warning: incorrect type in argument 1 (different address spaces) kernel/power/energy_model.c:320:19: expected struct kref *kref kernel/power/energy_model.c:320:19: got struct kref [noderef] __rcu * kernel/power/energy_model.c:325:45: warning: incorrect type in argument 2 (different address spaces) kernel/power/energy_model.c:325:45: expected struct em_perf_state *table kernel/power/energy_model.c:325:45: got struct em_perf_state [noderef] __rcu * kernel/power/energy_model.c:425:45: warning: incorrect type in argument 3 (different address spaces) kernel/power/energy_model.c:425:45: expected struct em_perf_state *table kernel/power/energy_model.c:425:45: got struct em_perf_state [noderef] __rcu * kernel/power/energy_model.c:442:15: warning: incorrect type in argument 1 (different address spaces) kernel/power/energy_model.c:442:15: expected void const *objp kernel/power/energy_model.c:442:15: got struct em_perf_table [noderef] __rcu *[assigned] em_table kernel/power/energy_model.c:626:55: warning: incorrect type in argument 2 (different address spaces) kernel/power/energy_model.c:626:55: expected struct em_perf_state *table kernel/power/energy_model.c:626:55: got struct em_perf_state [noderef] __rcu * kernel/power/energy_model.c:681:16: warning: incorrect type in assignment (different address spaces) kernel/power/energy_model.c:681:16: expected struct em_perf_state *new_ps kernel/power/energy_model.c:681:16: got struct em_perf_state [noderef] __rcu * kernel/power/energy_model.c:699:37: warning: incorrect type in argument 2 (different address spaces) kernel/power/energy_model.c:699:37: expected struct em_perf_state *table kernel/power/energy_model.c:699:37: got struct em_perf_state [noderef] __rcu * kernel/power/energy_model.c:733:38: warning: incorrect type in argument 3 (different address spaces) kernel/power/energy_model.c:733:38: expected struct em_perf_state *table kernel/power/energy_model.c:733:38: got struct em_perf_state [noderef] __rcu * kernel/power/energy_model.c:855:53: warning: dereference of noderef expression kernel/power/energy_model.c:864:32: warning: dereference of noderef expression This is because the __rcu annotation for sparse is only applicable to pointers that need rcu_dereference() or equivalent for protection, which basically means pointers assigned with rcu_assign_pointer(). Make all of the above sparse warnings go away by cleaning up the usage of __rcu and using rcu_dereference_protected() where applicable. Cc: All applicable Signed-off-by: Rafael J. Wysocki Reviewed-by: Lukasz Luba Link: https://patch.msgid.link/5885405.DvuYhMxLoT@rjwysocki.net --- include/linux/energy_model.h | 12 +++++------ kernel/power/energy_model.c | 39 ++++++++++++++++++------------------ 2 files changed, 26 insertions(+), 25 deletions(-) diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index b23c8c798dac..ddd09debfc7d 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -167,13 +167,13 @@ struct em_data_callback { struct em_perf_domain *em_cpu_get(int cpu); struct em_perf_domain *em_pd_get(struct device *dev); int em_dev_update_perf_domain(struct device *dev, - struct em_perf_table __rcu *new_table); + struct em_perf_table *new_table); int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, const struct em_data_callback *cb, const cpumask_t *cpus, bool microwatts); void em_dev_unregister_perf_domain(struct device *dev); -struct em_perf_table __rcu *em_table_alloc(struct em_perf_domain *pd); -void em_table_free(struct em_perf_table __rcu *table); +struct em_perf_table *em_table_alloc(struct em_perf_domain *pd); +void em_table_free(struct em_perf_table *table); int em_dev_compute_costs(struct device *dev, struct em_perf_state *table, int nr_states); int em_dev_update_chip_binning(struct device *dev); @@ -373,14 +373,14 @@ static inline int em_pd_nr_perf_states(struct em_perf_domain *pd) return 0; } static inline -struct em_perf_table __rcu *em_table_alloc(struct em_perf_domain *pd) +struct em_perf_table *em_table_alloc(struct em_perf_domain *pd) { return NULL; } -static inline void em_table_free(struct em_perf_table __rcu *table) {} +static inline void em_table_free(struct em_perf_table *table) {} static inline int em_dev_update_perf_domain(struct device *dev, - struct em_perf_table __rcu *new_table) + struct em_perf_table *new_table) { return -EINVAL; } diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index 1e3caa96c271..d9b7e2b38c7a 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -163,12 +163,8 @@ static void em_debug_remove_pd(struct device *dev) {} static void em_release_table_kref(struct kref *kref) { - struct em_perf_table __rcu *table; - /* It was the last owner of this table so we can free */ - table = container_of(kref, struct em_perf_table, kref); - - kfree_rcu(table, rcu); + kfree_rcu(container_of(kref, struct em_perf_table, kref), rcu); } /** @@ -177,7 +173,7 @@ static void em_release_table_kref(struct kref *kref) * * No return values. */ -void em_table_free(struct em_perf_table __rcu *table) +void em_table_free(struct em_perf_table *table) { kref_put(&table->kref, em_release_table_kref); } @@ -190,9 +186,9 @@ void em_table_free(struct em_perf_table __rcu *table) * has a user. * Returns allocated table or NULL. */ -struct em_perf_table __rcu *em_table_alloc(struct em_perf_domain *pd) +struct em_perf_table *em_table_alloc(struct em_perf_domain *pd) { - struct em_perf_table __rcu *table; + struct em_perf_table *table; int table_size; table_size = sizeof(struct em_perf_state) * pd->nr_perf_states; @@ -300,9 +296,9 @@ int em_dev_compute_costs(struct device *dev, struct em_perf_state *table, * Return 0 on success or an error code on failure. */ int em_dev_update_perf_domain(struct device *dev, - struct em_perf_table __rcu *new_table) + struct em_perf_table *new_table) { - struct em_perf_table __rcu *old_table; + struct em_perf_table *old_table; struct em_perf_domain *pd; if (!dev) @@ -319,7 +315,8 @@ int em_dev_update_perf_domain(struct device *dev, kref_get(&new_table->kref); - old_table = pd->em_table; + old_table = rcu_dereference_protected(pd->em_table, + lockdep_is_held(&em_pd_mutex)); rcu_assign_pointer(pd->em_table, new_table); em_cpufreq_update_efficiencies(dev, new_table->state); @@ -392,7 +389,7 @@ static int em_create_pd(struct device *dev, int nr_states, const cpumask_t *cpus, unsigned long flags) { - struct em_perf_table __rcu *em_table; + struct em_perf_table *em_table; struct em_perf_domain *pd; struct device *cpu_dev; int cpu, ret, num_cpus; @@ -552,6 +549,7 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, const struct em_data_callback *cb, const cpumask_t *cpus, bool microwatts) { + struct em_perf_table *em_table; unsigned long cap, prev_cap = 0; unsigned long flags = 0; int cpu, ret; @@ -624,7 +622,9 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, dev->em_pd->min_perf_state = 0; dev->em_pd->max_perf_state = nr_states - 1; - em_cpufreq_update_efficiencies(dev, dev->em_pd->em_table->state); + em_table = rcu_dereference_protected(dev->em_pd->em_table, + lockdep_is_held(&em_pd_mutex)); + em_cpufreq_update_efficiencies(dev, em_table->state); em_debug_create_pd(dev); dev_info(dev, "EM: created perf domain\n"); @@ -661,7 +661,8 @@ void em_dev_unregister_perf_domain(struct device *dev) mutex_lock(&em_pd_mutex); em_debug_remove_pd(dev); - em_table_free(dev->em_pd->em_table); + em_table_free(rcu_dereference_protected(dev->em_pd->em_table, + lockdep_is_held(&em_pd_mutex))); kfree(dev->em_pd); dev->em_pd = NULL; @@ -669,9 +670,9 @@ void em_dev_unregister_perf_domain(struct device *dev) } EXPORT_SYMBOL_GPL(em_dev_unregister_perf_domain); -static struct em_perf_table __rcu *em_table_dup(struct em_perf_domain *pd) +static struct em_perf_table *em_table_dup(struct em_perf_domain *pd) { - struct em_perf_table __rcu *em_table; + struct em_perf_table *em_table; struct em_perf_state *ps, *new_ps; int ps_size; @@ -693,7 +694,7 @@ static struct em_perf_table __rcu *em_table_dup(struct em_perf_domain *pd) } static int em_recalc_and_update(struct device *dev, struct em_perf_domain *pd, - struct em_perf_table __rcu *em_table) + struct em_perf_table *em_table) { int ret; @@ -723,7 +724,7 @@ static int em_recalc_and_update(struct device *dev, struct em_perf_domain *pd, static void em_adjust_new_capacity(struct device *dev, struct em_perf_domain *pd) { - struct em_perf_table __rcu *em_table; + struct em_perf_table *em_table; em_table = em_table_dup(pd); if (!em_table) { @@ -814,7 +815,7 @@ static void em_update_workfn(struct work_struct *work) */ int em_dev_update_chip_binning(struct device *dev) { - struct em_perf_table __rcu *em_table; + struct em_perf_table *em_table; struct em_perf_domain *pd; int i, ret; From 17f08280cf89baf5e4620fc7af300082bcee7e24 Mon Sep 17 00:00:00 2001 From: Jeson Gao Date: Fri, 7 Mar 2025 13:23:49 +0000 Subject: [PATCH 10/10] PM: EM: Rework the depends on for CONFIG_ENERGY_MODEL Now not only CPUs can use energy efficiency models, but GPUs can also use. On the other hand, even with only one CPU, we can also use energy_model to align control in thermal. So remove the dependence of SMP, and add the DEVFREQ. Signed-off-by: Jeson Gao [Added missing SMP config option in DTPM_CPU dependency] Signed-off-by: Lukasz Luba Link: https://patch.msgid.link/20250307132649.4056210-1-lukasz.luba@arm.com [ rjw: Subject edits ] Signed-off-by: Rafael J. Wysocki --- drivers/powercap/Kconfig | 2 +- kernel/power/Kconfig | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/powercap/Kconfig b/drivers/powercap/Kconfig index 69ef8d081c98..03c4c796d993 100644 --- a/drivers/powercap/Kconfig +++ b/drivers/powercap/Kconfig @@ -82,7 +82,7 @@ config DTPM config DTPM_CPU bool "Add CPU power capping based on the energy model" - depends on DTPM && ENERGY_MODEL + depends on DTPM && ENERGY_MODEL && SMP help This enables support for CPU power limitation based on energy model. diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index ca947ed32e3d..54a623680019 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -380,8 +380,7 @@ config CPU_PM config ENERGY_MODEL bool "Energy Model for devices with DVFS (CPUs, GPUs, etc)" - depends on SMP - depends on CPU_FREQ + depends on CPU_FREQ || PM_DEVFREQ help Several subsystems (thermal and/or the task scheduler for example) can leverage information about the energy consumed by devices to