From aaa18ff54b97706b84306b6613630262706b1f6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=ADcolas=20F=2E=20R=2E=20A=2E=20Prado?= Date: Tue, 2 Jul 2024 17:24:56 -0400 Subject: [PATCH 1/3] thermal: gov_power_allocator: Return early in manage if trip_max is NULL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit da781936e7c3 ("thermal: gov_power_allocator: Allow binding without trip points") allowed the governor to bind even when trip_max is NULL. This allows a NULL pointer dereference to happen in the manage callback. Add an early return to prevent it, since the governor is expected to not do anything in this case. Fixes: da781936e7c3 ("thermal: gov_power_allocator: Allow binding without trip points") Signed-off-by: NĂ­colas F. R. A. Prado Link: https://patch.msgid.link/20240702-power-allocator-null-trip-max-v1-1-47a60dc55414@collabora.com Cc: All applicable Signed-off-by: Rafael J. Wysocki --- drivers/thermal/gov_power_allocator.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/thermal/gov_power_allocator.c b/drivers/thermal/gov_power_allocator.c index 45f04a25255a..1b2345a697c5 100644 --- a/drivers/thermal/gov_power_allocator.c +++ b/drivers/thermal/gov_power_allocator.c @@ -759,6 +759,9 @@ static void power_allocator_manage(struct thermal_zone_device *tz) return; } + if (!params->trip_max) + return; + allocate_power(tz, params->trip_max->temperature); params->update_cdevs = true; } From a8a261774466d8691e555ea674c193bb1b09edab Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 28 Jun 2024 14:10:03 +0200 Subject: [PATCH 2/3] thermal: core: Call monitor_thermal_zone() if zone temperature is invalid Commit 202aa0d4bb53 ("thermal: core: Do not call handle_thermal_trip() if zone temperature is invalid") caused __thermal_zone_device_update() to return early if the current thermal zone temperature was invalid. This was done to avoid running handle_thermal_trip() and governor callbacks in that case which led to confusion. However, it went too far because monitor_thermal_zone() still needs to be called even when the zone temperature is invalid to ensure that it will be updated eventually in case thermal polling is enabled and the driver has no other means to notify the core of zone temperature changes (for example, it does not register an interrupt handler or ACPI notifier). Also if the .set_trips() zone callback is expected to set up monitoring interrupts for a thermal zone, it has to be provided with valid boundaries and that can only happen if the zone temperature is known. Accordingly, to ensure that __thermal_zone_device_update() will run again after a failing zone temperature check, make it call monitor_thermal_zone() regardless of whether or not the zone temperature is valid and make the latter schedule a thermal zone temperature update if the zone temperature is invalid even if polling is not enabled for the thermal zone. Fixes: 202aa0d4bb53 ("thermal: core: Do not call handle_thermal_trip() if zone temperature is invalid") Reported-by: Daniel Lezcano Tested-by: Daniel Lezcano Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/2764814.mvXUDI8C0e@rjwysocki.net [ rjw: Changed THERMAL_RECHECK_DELAY_MS to 250 ] Signed-off-by: Rafael J. Wysocki --- drivers/thermal/thermal_core.c | 5 ++++- drivers/thermal/thermal_core.h | 6 ++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 1b0ab2790860..46418e30b4a6 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -300,6 +300,8 @@ static void monitor_thermal_zone(struct thermal_zone_device *tz) thermal_zone_device_set_polling(tz, tz->passive_delay_jiffies); else if (tz->polling_delay_jiffies) thermal_zone_device_set_polling(tz, tz->polling_delay_jiffies); + else if (tz->temperature == THERMAL_TEMP_INVALID) + thermal_zone_device_set_polling(tz, msecs_to_jiffies(THERMAL_RECHECK_DELAY_MS)); } static struct thermal_governor *thermal_get_tz_governor(struct thermal_zone_device *tz) @@ -511,7 +513,7 @@ void __thermal_zone_device_update(struct thermal_zone_device *tz, update_temperature(tz); if (tz->temperature == THERMAL_TEMP_INVALID) - return; + goto monitor; __thermal_zone_set_trips(tz); @@ -533,6 +535,7 @@ void __thermal_zone_device_update(struct thermal_zone_device *tz, thermal_debug_update_trip_stats(tz); +monitor: monitor_thermal_zone(tz); } diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h index 66f67e54e0c8..94eeb4011a48 100644 --- a/drivers/thermal/thermal_core.h +++ b/drivers/thermal/thermal_core.h @@ -133,6 +133,12 @@ struct thermal_zone_device { struct thermal_trip_desc trips[] __counted_by(num_trips); }; +/* + * Default delay after a failing thermal zone temperature check before + * attempting to check it again. + */ +#define THERMAL_RECHECK_DELAY_MS 250 + /* Default Thermal Governor */ #if defined(CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE) #define DEFAULT_THERMAL_GOVERNOR "step_wise" From 94eacc1c583dd2ba51a2158fb13285f5dc42714b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 8 Jul 2024 17:16:00 +0200 Subject: [PATCH 3/3] thermal: core: Fix list sorting in __thermal_zone_device_update() The order in which lists are sorted in __thermal_zone_device_update() is reverse with respect to what it should be due to a mistake in thermal_trip_notify_cmp(). Fix it and observe that it is not necessary to sort the lists in different orders. They can both be sorted in ascending order if way_down_list is walked in reverse order which allows the code to be slightly more straightforward (and less prone to silly mistakes). Fixes: 7454f2c42cce ("thermal: core: Sort trip point crossing notifications by temperature") Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/12481676.O9o76ZdvQC@rjwysocki.net --- drivers/thermal/thermal_core.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 46418e30b4a6..ecc748d15eb7 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -484,16 +484,14 @@ static void thermal_trip_crossed(struct thermal_zone_device *tz, thermal_governor_trip_crossed(governor, tz, trip, crossed_up); } -static int thermal_trip_notify_cmp(void *ascending, const struct list_head *a, +static int thermal_trip_notify_cmp(void *not_used, const struct list_head *a, const struct list_head *b) { struct thermal_trip_desc *tda = container_of(a, struct thermal_trip_desc, notify_list_node); struct thermal_trip_desc *tdb = container_of(b, struct thermal_trip_desc, notify_list_node); - int ret = tdb->notify_temp - tda->notify_temp; - - return ascending ? ret : -ret; + return tda->notify_temp - tdb->notify_temp; } void __thermal_zone_device_update(struct thermal_zone_device *tz, @@ -522,12 +520,12 @@ void __thermal_zone_device_update(struct thermal_zone_device *tz, for_each_trip_desc(tz, td) handle_thermal_trip(tz, td, &way_up_list, &way_down_list); - list_sort(&way_up_list, &way_up_list, thermal_trip_notify_cmp); + list_sort(NULL, &way_up_list, thermal_trip_notify_cmp); list_for_each_entry(td, &way_up_list, notify_list_node) thermal_trip_crossed(tz, &td->trip, governor, true); list_sort(NULL, &way_down_list, thermal_trip_notify_cmp); - list_for_each_entry(td, &way_down_list, notify_list_node) + list_for_each_entry_reverse(td, &way_down_list, notify_list_node) thermal_trip_crossed(tz, &td->trip, governor, false); if (governor->manage)