From dba428a678c7263afce06b1f765efa0e054278e2 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 12 Sep 2023 12:44:02 +0200 Subject: [PATCH 01/17] tick/nohz: Rename the tick handlers to more self-explanatory names The current names of the tick handlers don't tell much about what different between them. Use names that better reflect their role and resolution. Signed-off-by: Frederic Weisbecker Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20230912104406.312185-2-frederic@kernel.org --- kernel/time/tick-sched.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 87015e9deacc..b66dd0ff1153 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -1366,7 +1366,7 @@ void tick_nohz_idle_exit(void) /* * The nohz low res interrupt handler */ -static void tick_nohz_handler(struct clock_event_device *dev) +static void tick_nohz_lowres_handler(struct clock_event_device *dev) { struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); struct pt_regs *regs = get_irq_regs(); @@ -1412,7 +1412,7 @@ static void tick_nohz_switch_to_nohz(void) if (!tick_nohz_enabled) return; - if (tick_switch_to_oneshot(tick_nohz_handler)) + if (tick_switch_to_oneshot(tick_nohz_lowres_handler)) return; /* @@ -1475,7 +1475,7 @@ void tick_irq_enter(void) * We rearm the timer until we get disabled by the idle code. * Called with interrupts disabled. */ -static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer) +static enum hrtimer_restart tick_nohz_highres_handler(struct hrtimer *timer) { struct tick_sched *ts = container_of(timer, struct tick_sched, sched_timer); @@ -1524,7 +1524,7 @@ void tick_setup_sched_timer(void) * Emulate tick processing via per-CPU hrtimers: */ hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); - ts->sched_timer.function = tick_sched_timer; + ts->sched_timer.function = tick_nohz_highres_handler; /* Get the next period (per-CPU) */ hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update()); From 822deeed3a6a3fdf0cd899d3b403ecbb12fb6c7a Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 12 Sep 2023 12:44:03 +0200 Subject: [PATCH 02/17] tick/nohz: Update obsolete comments Some comments are obsolete enough to assume that IRQ exit restarts the tick in idle or RCU is turned on at the same time as the tick, among other details. Update them and add more. Signed-off-by: Frederic Weisbecker Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20230912104406.312185-3-frederic@kernel.org --- kernel/time/tick-sched.c | 46 +++++++++++++++++++++++++++++++--------- 1 file changed, 36 insertions(+), 10 deletions(-) diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index b66dd0ff1153..95a8d1d118a2 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -1175,12 +1175,23 @@ void tick_nohz_idle_enter(void) } /** - * tick_nohz_irq_exit - update next tick event from interrupt exit + * tick_nohz_irq_exit - Notify the tick about IRQ exit * - * When an interrupt fires while we are idle and it doesn't cause - * a reschedule, it may still add, modify or delete a timer, enqueue - * an RCU callback, etc... - * So we need to re-calculate and reprogram the next tick event. + * A timer may have been added/modified/deleted either by the current IRQ, + * or by another place using this IRQ as a notification. This IRQ may have + * also updated the RCU callback list. These events may require a + * re-evaluation of the next tick. Depending on the context: + * + * 1) If the CPU is idle and no resched is pending, just proceed with idle + * time accounting. The next tick will be re-evaluated on the next idle + * loop iteration. + * + * 2) If the CPU is nohz_full: + * + * 2.1) If there is any tick dependency, restart the tick if stopped. + * + * 2.2) If there is no tick dependency, (re-)evaluate the next tick and + * stop/update it accordingly. */ void tick_nohz_irq_exit(void) { @@ -1330,11 +1341,20 @@ static void tick_nohz_idle_update_tick(struct tick_sched *ts, ktime_t now) } /** - * tick_nohz_idle_exit - restart the idle tick from the idle task + * tick_nohz_idle_exit - Update the tick upon idle task exit + * + * When the idle task exits, update the tick depending on the + * following situations: + * + * 1) If the CPU is not in nohz_full mode (most cases), then + * restart the tick. + * + * 2) If the CPU is in nohz_full mode (corner case): + * 2.1) If the tick can be kept stopped (no tick dependencies) + * then re-eavaluate the next tick and try to keep it stopped + * as long as possible. + * 2.2) If the tick has dependencies, restart the tick. * - * Restart the idle tick when the CPU is woken up from idle - * This also exit the RCU extended quiescent state. The CPU - * can use RCU again after this function is called. */ void tick_nohz_idle_exit(void) { @@ -1364,7 +1384,13 @@ void tick_nohz_idle_exit(void) } /* - * The nohz low res interrupt handler + * In low-resolution mode, the tick handler must be implemented directly + * at the clockevent level. hrtimer can't be used instead because its + * infrastructure actually relies on the tick itself as a backend in + * low-resolution mode (see hrtimer_run_queues()). + * + * This low-resolution handler still makes use of some hrtimer APIs meanwhile + * for commodity with expiration calculation and forwarding. */ static void tick_nohz_lowres_handler(struct clock_event_device *dev) { From 4f7f4409af289715f44685f250e380ce2cbffc7e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 12 Sep 2023 12:44:04 +0200 Subject: [PATCH 03/17] tick/nohz: Don't shutdown the lowres tick from itself In lowres dynticks mode, just like in highres dynticks mode, when there is no tick to program in the future, the tick eventually gets deactivated either: * From the idle loop if in idle mode. * From the IRQ exit if in full dynticks mode. Therefore there is no need to deactivate it from the tick itself. This just just brings more overhead in the idle tick path for no reason. Signed-off-by: Frederic Weisbecker Signed-off-by: Thomas Gleixner Reviewed-by: Joel Fernandes (Google) Link: https://lore.kernel.org/r/20230912104406.312185-4-frederic@kernel.org --- kernel/time/tick-sched.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 95a8d1d118a2..8e9a9dcf60d5 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -1403,18 +1403,16 @@ static void tick_nohz_lowres_handler(struct clock_event_device *dev) tick_sched_do_timer(ts, now); tick_sched_handle(ts, regs); - if (unlikely(ts->tick_stopped)) { - /* - * The clockevent device is not reprogrammed, so change the - * clock event device to ONESHOT_STOPPED to avoid spurious - * interrupts on devices which might not be truly one shot. - */ - tick_program_event(KTIME_MAX, 1); - return; + /* + * In dynticks mode, tick reprogram is deferred: + * - to the idle task if in dynticks-idle + * - to IRQ exit if in full-dynticks. + */ + if (likely(!ts->tick_stopped)) { + hrtimer_forward(&ts->sched_timer, now, TICK_NSEC); + tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1); } - hrtimer_forward(&ts->sched_timer, now, TICK_NSEC); - tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1); } static inline void tick_nohz_activate(struct tick_sched *ts, int mode) @@ -1519,7 +1517,11 @@ static enum hrtimer_restart tick_nohz_highres_handler(struct hrtimer *timer) else ts->next_tick = 0; - /* No need to reprogram if we are in idle or full dynticks mode */ + /* + * In dynticks mode, tick reprogram is deferred: + * - to the idle task if in dynticks-idle + * - to IRQ exit if in full-dynticks. + */ if (unlikely(ts->tick_stopped)) return HRTIMER_NORESTART; From c02a427f7b64ed5b840a0720a6cee5a17a1e7e07 Mon Sep 17 00:00:00 2001 From: Xueshi Hu Date: Tue, 12 Sep 2023 12:44:05 +0200 Subject: [PATCH 04/17] tick/nohz: Remove unused tick_nohz_idle_stop_tick_protected() All the caller has been removed since commit 336f560a8917 ("x86/xen: don't let xen_pv_play_dead() return") Signed-off-by: Xueshi Hu Signed-off-by: Frederic Weisbecker Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20230912104406.312185-5-frederic@kernel.org --- include/linux/tick.h | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/include/linux/tick.h b/include/linux/tick.h index 9459fef5b857..716d17f31c45 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -140,14 +140,6 @@ extern unsigned long tick_nohz_get_idle_calls(void); extern unsigned long tick_nohz_get_idle_calls_cpu(int cpu); extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); - -static inline void tick_nohz_idle_stop_tick_protected(void) -{ - local_irq_disable(); - tick_nohz_idle_stop_tick(); - local_irq_enable(); -} - #else /* !CONFIG_NO_HZ_COMMON */ #define tick_nohz_enabled (0) static inline int tick_nohz_tick_stopped(void) { return 0; } @@ -170,8 +162,6 @@ static inline ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next) } static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; } static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; } - -static inline void tick_nohz_idle_stop_tick_protected(void) { } #endif /* !CONFIG_NO_HZ_COMMON */ #ifdef CONFIG_NO_HZ_FULL From 6c774377359923e4bb46c6f26381edd9189389ed Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 28 Sep 2023 11:07:01 +0200 Subject: [PATCH 05/17] tick/nohz: Update comments some more Inspired by recent enhancements to comments in kernel/time/tick-sched.c, go through the entire file and fix/unify its comments: - Fix over a dozen typos, spelling mistakes & cases of bad grammar. - Re-phrase sentences that I needed to read three times to understand. [ I used the following arbitrary rule-of-thumb: - if I had to read a comment twice, it was usually my fault, - if I had to read it a third time, it was the comment's fault. ] - Comma updates: - Add commas where needed - Remove commas where not needed - In cases where a comma is optional, choose one variant and try to standardize it over similar sentences in the file. - Standardize on standalone 'NOHZ' spelling in free-flowing comments: s/nohz/NOHZ s/no idle tick/NOHZ Still keep 'dynticks' as a popular synonym. - Standardize on referring to variable names within free-flowing comments with the "'var'" nomenclature, and function names as "function_name()". - Standardize on '64-bit' and '32-bit': s/32bit/32-bit s/64bit/64-bit - Standardize on 'IRQ work': s/irq work/IRQ work - A few other tidyups I probably missed to list. No change in functionality intended - other than one small change to a syslog output string. Signed-off-by: Ingo Molnar Acked-by: Frederic Weisbecker Link: https://lore.kernel.org/r/ZRVCNeMcSQcXS36N@gmail.com --- kernel/time/tick-sched.c | 150 +++++++++++++++++++-------------------- 1 file changed, 74 insertions(+), 76 deletions(-) diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 8e9a9dcf60d5..be77b021e5d6 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -4,7 +4,7 @@ * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar * Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner * - * No idle tick implementation for low and high resolution timers + * NOHZ implementation for low and high resolution timers * * Started by: Thomas Gleixner and Ingo Molnar */ @@ -45,7 +45,7 @@ struct tick_sched *tick_get_tick_sched(int cpu) #if defined(CONFIG_NO_HZ_COMMON) || defined(CONFIG_HIGH_RES_TIMERS) /* - * The time, when the last jiffy update happened. Write access must hold + * The time when the last jiffy update happened. Write access must hold * jiffies_lock and jiffies_seq. tick_nohz_next_event() needs to get a * consistent view of jiffies and last_jiffies_update. */ @@ -60,13 +60,13 @@ static void tick_do_update_jiffies64(ktime_t now) ktime_t delta, nextp; /* - * 64bit can do a quick check without holding jiffies lock and + * 64-bit can do a quick check without holding the jiffies lock and * without looking at the sequence count. The smp_load_acquire() * pairs with the update done later in this function. * - * 32bit cannot do that because the store of tick_next_period - * consists of two 32bit stores and the first store could move it - * to a random point in the future. + * 32-bit cannot do that because the store of 'tick_next_period' + * consists of two 32-bit stores, and the first store could be + * moved by the CPU to a random point in the future. */ if (IS_ENABLED(CONFIG_64BIT)) { if (ktime_before(now, smp_load_acquire(&tick_next_period))) @@ -75,7 +75,7 @@ static void tick_do_update_jiffies64(ktime_t now) unsigned int seq; /* - * Avoid contention on jiffies_lock and protect the quick + * Avoid contention on 'jiffies_lock' and protect the quick * check with the sequence count. */ do { @@ -90,7 +90,7 @@ static void tick_do_update_jiffies64(ktime_t now) /* Quick check failed, i.e. update is required. */ raw_spin_lock(&jiffies_lock); /* - * Reevaluate with the lock held. Another CPU might have done the + * Re-evaluate with the lock held. Another CPU might have done the * update already. */ if (ktime_before(now, tick_next_period)) { @@ -114,25 +114,23 @@ static void tick_do_update_jiffies64(ktime_t now) TICK_NSEC); } - /* Advance jiffies to complete the jiffies_seq protected job */ + /* Advance jiffies to complete the 'jiffies_seq' protected job */ jiffies_64 += ticks; - /* - * Keep the tick_next_period variable up to date. - */ + /* Keep the tick_next_period variable up to date */ nextp = ktime_add_ns(last_jiffies_update, TICK_NSEC); if (IS_ENABLED(CONFIG_64BIT)) { /* * Pairs with smp_load_acquire() in the lockless quick - * check above and ensures that the update to jiffies_64 is - * not reordered vs. the store to tick_next_period, neither + * check above, and ensures that the update to 'jiffies_64' is + * not reordered vs. the store to 'tick_next_period', neither * by the compiler nor by the CPU. */ smp_store_release(&tick_next_period, nextp); } else { /* - * A plain store is good enough on 32bit as the quick check + * A plain store is good enough on 32-bit, as the quick check * above is protected by the sequence count. */ tick_next_period = nextp; @@ -140,7 +138,7 @@ static void tick_do_update_jiffies64(ktime_t now) /* * Release the sequence count. calc_global_load() below is not - * protected by it, but jiffies_lock needs to be held to prevent + * protected by it, but 'jiffies_lock' needs to be held to prevent * concurrent invocations. */ write_seqcount_end(&jiffies_seq); @@ -160,7 +158,8 @@ static ktime_t tick_init_jiffy_update(void) raw_spin_lock(&jiffies_lock); write_seqcount_begin(&jiffies_seq); - /* Did we start the jiffies update yet ? */ + + /* Have we started the jiffies update yet ? */ if (last_jiffies_update == 0) { u32 rem; @@ -175,8 +174,10 @@ static ktime_t tick_init_jiffy_update(void) last_jiffies_update = tick_next_period; } period = last_jiffies_update; + write_seqcount_end(&jiffies_seq); raw_spin_unlock(&jiffies_lock); + return period; } @@ -192,10 +193,10 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now) * concurrency: This happens only when the CPU in charge went * into a long sleep. If two CPUs happen to assign themselves to * this duty, then the jiffies update is still serialized by - * jiffies_lock. + * 'jiffies_lock'. * * If nohz_full is enabled, this should not happen because the - * tick_do_timer_cpu never relinquishes. + * 'tick_do_timer_cpu' CPU never relinquishes. */ if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) { #ifdef CONFIG_NO_HZ_FULL @@ -205,12 +206,12 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now) } #endif - /* Check, if the jiffies need an update */ + /* Check if jiffies need an update */ if (tick_do_timer_cpu == cpu) tick_do_update_jiffies64(now); /* - * If jiffies update stalled for too long (timekeeper in stop_machine() + * If the jiffies update stalled for too long (timekeeper in stop_machine() * or VMEXIT'ed for several msecs), force an update. */ if (ts->last_tick_jiffies != jiffies) { @@ -234,10 +235,10 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) /* * When we are idle and the tick is stopped, we have to touch * the watchdog as we might not schedule for a really long - * time. This happens on complete idle SMP systems while + * time. This happens on completely idle SMP systems while * waiting on the login prompt. We also increment the "start of * idle" jiffy stamp so the idle accounting adjustment we do - * when we go busy again does not account too much ticks. + * when we go busy again does not account too many ticks. */ if (ts->tick_stopped) { touch_softlockup_watchdog_sched(); @@ -362,7 +363,7 @@ static void tick_nohz_kick_task(struct task_struct *tsk) /* * If the task is not running, run_posix_cpu_timers() - * has nothing to elapse, IPI can then be spared. + * has nothing to elapse, and an IPI can then be optimized out. * * activate_task() STORE p->tick_dep_mask * STORE p->on_rq @@ -425,7 +426,7 @@ static void tick_nohz_dep_set_all(atomic_t *dep, /* * Set a global tick dependency. Used by perf events that rely on freq and - * by unstable clock. + * unstable clocks. */ void tick_nohz_dep_set(enum tick_dep_bits bit) { @@ -439,7 +440,7 @@ void tick_nohz_dep_clear(enum tick_dep_bits bit) /* * Set per-CPU tick dependency. Used by scheduler and perf events in order to - * manage events throttling. + * manage event-throttling. */ void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit) { @@ -455,7 +456,7 @@ void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit) if (cpu == smp_processor_id()) { tick_nohz_full_kick(); } else { - /* Remote irq work not NMI-safe */ + /* Remote IRQ work not NMI-safe */ if (!WARN_ON_ONCE(in_nmi())) tick_nohz_full_kick_cpu(cpu); } @@ -473,7 +474,7 @@ void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit) EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_cpu); /* - * Set a per-task tick dependency. RCU need this. Also posix CPU timers + * Set a per-task tick dependency. RCU needs this. Also posix CPU timers * in order to elapse per task timers. */ void tick_nohz_dep_set_task(struct task_struct *tsk, enum tick_dep_bits bit) @@ -546,7 +547,7 @@ void __init tick_nohz_full_setup(cpumask_var_t cpumask) bool tick_nohz_cpu_hotpluggable(unsigned int cpu) { /* - * The tick_do_timer_cpu CPU handles housekeeping duty (unbound + * The 'tick_do_timer_cpu' CPU handles housekeeping duty (unbound * timers, workqueues, timekeeping, ...) on behalf of full dynticks * CPUs. It must remain online when nohz full is enabled. */ @@ -568,12 +569,12 @@ void __init tick_nohz_init(void) return; /* - * Full dynticks uses irq work to drive the tick rescheduling on safe - * locking contexts. But then we need irq work to raise its own - * interrupts to avoid circular dependency on the tick + * Full dynticks uses IRQ work to drive the tick rescheduling on safe + * locking contexts. But then we need IRQ work to raise its own + * interrupts to avoid circular dependency on the tick. */ if (!arch_irq_work_has_interrupt()) { - pr_warn("NO_HZ: Can't run full dynticks because arch doesn't support irq work self-IPIs\n"); + pr_warn("NO_HZ: Can't run full dynticks because arch doesn't support IRQ work self-IPIs\n"); cpumask_clear(tick_nohz_full_mask); tick_nohz_full_running = false; return; @@ -643,7 +644,7 @@ bool tick_nohz_tick_stopped_cpu(int cpu) * In case the sched_tick was stopped on this CPU, we have to check if jiffies * must be updated. Otherwise an interrupt handler could use a stale jiffy * value. We do this unconditionally on any CPU, as we don't know whether the - * CPU, which has the update task assigned is in a long sleep. + * CPU, which has the update task assigned, is in a long sleep. */ static void tick_nohz_update_jiffies(ktime_t now) { @@ -726,7 +727,7 @@ static u64 get_cpu_sleep_time_us(struct tick_sched *ts, ktime_t *sleeptime, * counters if NULL. * * Return the cumulative idle time (since boot) for a given - * CPU, in microseconds. Note this is partially broken due to + * CPU, in microseconds. Note that this is partially broken due to * the counter of iowait tasks that can be remotely updated without * any synchronization. Therefore it is possible to observe backward * values within two consecutive reads. @@ -787,7 +788,7 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) } /* - * Reset to make sure next tick stop doesn't get fooled by past + * Reset to make sure the next tick stop doesn't get fooled by past * cached clock deadline. */ ts->next_tick = 0; @@ -816,11 +817,11 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu) /* * Keep the periodic tick, when RCU, architecture or irq_work * requests it. - * Aside of that check whether the local timer softirq is - * pending. If so its a bad idea to call get_next_timer_interrupt() + * Aside of that, check whether the local timer softirq is + * pending. If so, its a bad idea to call get_next_timer_interrupt(), * because there is an already expired timer, so it will request * immediate expiry, which rearms the hardware timer with a - * minimal delta which brings us back to this place + * minimal delta, which brings us back to this place * immediately. Lather, rinse and repeat... */ if (rcu_needs_cpu() || arch_needs_cpu() || @@ -861,7 +862,7 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu) /* * If this CPU is the one which had the do_timer() duty last, we limit - * the sleep time to the timekeeping max_deferment value. + * the sleep time to the timekeeping 'max_deferment' value. * Otherwise we can sleep as long as we want. */ delta = timekeeping_max_deferment(); @@ -895,8 +896,8 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu) * If this CPU is the one which updates jiffies, then give up * the assignment and let it be taken by the CPU which runs * the tick timer next, which might be this CPU as well. If we - * don't drop this here the jiffies might be stale and - * do_timer() never invoked. Keep track of the fact that it + * don't drop this here, the jiffies might be stale and + * do_timer() never gets invoked. Keep track of the fact that it * was the one which had the do_timer() duty last. */ if (cpu == tick_do_timer_cpu) { @@ -906,7 +907,7 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu) ts->do_timer_last = 0; } - /* Skip reprogram of event if its not changed */ + /* Skip reprogram of event if it's not changed */ if (ts->tick_stopped && (expires == ts->next_tick)) { /* Sanity check: make sure clockevent is actually programmed */ if (tick == KTIME_MAX || ts->next_tick == hrtimer_get_expires(&ts->sched_timer)) @@ -919,11 +920,11 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu) } /* - * nohz_stop_sched_tick can be called several times before - * the nohz_restart_sched_tick is called. This happens when + * nohz_stop_sched_tick() can be called several times before + * nohz_restart_sched_tick() is called. This happens when * interrupts arrive which do not cause a reschedule. In the * first call we save the current tick time, so we can restart - * the scheduler tick in nohz_restart_sched_tick. + * the scheduler tick in nohz_restart_sched_tick(). */ if (!ts->tick_stopped) { calc_load_nohz_start(); @@ -985,9 +986,8 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now) calc_load_nohz_stop(); touch_softlockup_watchdog_sched(); - /* - * Cancel the scheduled timer and restore the tick - */ + + /* Cancel the scheduled timer and restore the tick: */ ts->tick_stopped = 0; tick_nohz_restart(ts, now); } @@ -1019,11 +1019,11 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts) /* * A pending softirq outside an IRQ (or softirq disabled section) context * should be waiting for ksoftirqd to handle it. Therefore we shouldn't - * reach here due to the need_resched() early check in can_stop_idle_tick(). + * reach this code due to the need_resched() early check in can_stop_idle_tick(). * * However if we are between CPUHP_AP_SMPBOOT_THREADS and CPU_TEARDOWN_CPU on the * cpu_down() process, softirqs can still be raised while ksoftirqd is parked, - * triggering the below since wakep_softirqd() is ignored. + * triggering the code below, since wakep_softirqd() is ignored. * */ static bool report_idle_softirq(void) @@ -1044,7 +1044,7 @@ static bool report_idle_softirq(void) if (ratelimit >= 10) return false; - /* On RT, softirqs handling may be waiting on some lock */ + /* On RT, softirq handling may be waiting on some lock */ if (local_bh_blocked()) return false; @@ -1061,8 +1061,8 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) * If this CPU is offline and it is the one which updates * jiffies, then give up the assignment and let it be taken by * the CPU which runs the tick timer next. If we don't drop - * this here the jiffies might be stale and do_timer() never - * invoked. + * this here, the jiffies might be stale and do_timer() never + * gets invoked. */ if (unlikely(!cpu_online(cpu))) { if (cpu == tick_do_timer_cpu) @@ -1219,7 +1219,7 @@ bool tick_nohz_idle_got_tick(void) /** * tick_nohz_get_next_hrtimer - return the next expiration time for the hrtimer - * or the tick, whatever that expires first. Note that, if the tick has been + * or the tick, whichever expires first. Note that, if the tick has been * stopped, it returns the next hrtimer. * * Called from power state control code with interrupts disabled @@ -1263,7 +1263,7 @@ ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next) return *delta_next; /* - * If the next highres timer to expire is earlier than next_event, the + * If the next highres timer to expire is earlier than 'next_event', the * idle governor needs to know that. */ next_event = min_t(u64, next_event, @@ -1307,9 +1307,9 @@ static void tick_nohz_account_idle_time(struct tick_sched *ts, if (vtime_accounting_enabled_this_cpu()) return; /* - * We stopped the tick in idle. Update process times would miss the - * time we slept as update_process_times does only a 1 tick - * accounting. Enforce that this is accounted to idle ! + * We stopped the tick in idle. update_process_times() would miss the + * time we slept, as it does only a 1 tick accounting. + * Enforce that this is accounted to idle ! */ ticks = jiffies - ts->idle_jiffies; /* @@ -1351,7 +1351,7 @@ static void tick_nohz_idle_update_tick(struct tick_sched *ts, ktime_t now) * * 2) If the CPU is in nohz_full mode (corner case): * 2.1) If the tick can be kept stopped (no tick dependencies) - * then re-eavaluate the next tick and try to keep it stopped + * then re-evaluate the next tick and try to keep it stopped * as long as possible. * 2.2) If the tick has dependencies, restart the tick. * @@ -1385,12 +1385,12 @@ void tick_nohz_idle_exit(void) /* * In low-resolution mode, the tick handler must be implemented directly - * at the clockevent level. hrtimer can't be used instead because its + * at the clockevent level. hrtimer can't be used instead, because its * infrastructure actually relies on the tick itself as a backend in * low-resolution mode (see hrtimer_run_queues()). * * This low-resolution handler still makes use of some hrtimer APIs meanwhile - * for commodity with expiration calculation and forwarding. + * for convenience with expiration calculation and forwarding. */ static void tick_nohz_lowres_handler(struct clock_event_device *dev) { @@ -1426,7 +1426,7 @@ static inline void tick_nohz_activate(struct tick_sched *ts, int mode) } /** - * tick_nohz_switch_to_nohz - switch to nohz mode + * tick_nohz_switch_to_nohz - switch to NOHZ mode */ static void tick_nohz_switch_to_nohz(void) { @@ -1440,8 +1440,8 @@ static void tick_nohz_switch_to_nohz(void) return; /* - * Recycle the hrtimer in ts, so we can share the - * hrtimer_forward with the highres code. + * Recycle the hrtimer in 'ts', so we can share the + * hrtimer_forward_now() function with the highres code. */ hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); /* Get the next period */ @@ -1464,7 +1464,7 @@ static inline void tick_nohz_irq_enter(void) if (ts->idle_active) tick_nohz_stop_idle(ts, now); /* - * If all CPUs are idle. We may need to update a stale jiffies value. + * If all CPUs are idle we may need to update a stale jiffies value. * Note nohz_full is a special case: a timekeeper is guaranteed to stay * alive but it might be busy looping with interrupts disabled in some * rare case (typically stop machine). So we must make sure we have a @@ -1483,7 +1483,7 @@ static inline void tick_nohz_activate(struct tick_sched *ts, int mode) { } #endif /* CONFIG_NO_HZ_COMMON */ /* - * Called from irq_enter to notify about the possible interruption of idle() + * Called from irq_enter() to notify about the possible interruption of idle() */ void tick_irq_enter(void) { @@ -1509,8 +1509,8 @@ static enum hrtimer_restart tick_nohz_highres_handler(struct hrtimer *timer) tick_sched_do_timer(ts, now); /* - * Do not call, when we are not in irq context and have - * no valid regs pointer + * Do not call when we are not in IRQ context and have + * no valid 'regs' pointer */ if (regs) tick_sched_handle(ts, regs); @@ -1548,16 +1548,14 @@ void tick_setup_sched_timer(void) struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); ktime_t now = ktime_get(); - /* - * Emulate tick processing via per-CPU hrtimers: - */ + /* Emulate tick processing via per-CPU hrtimers: */ hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); ts->sched_timer.function = tick_nohz_highres_handler; /* Get the next period (per-CPU) */ hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update()); - /* Offset the tick to avert jiffies_lock contention. */ + /* Offset the tick to avert 'jiffies_lock' contention. */ if (sched_skew_tick) { u64 offset = TICK_NSEC >> 1; do_div(offset, num_possible_cpus()); @@ -1607,10 +1605,10 @@ void tick_oneshot_notify(void) } /* - * Check, if a change happened, which makes oneshot possible. + * Check if a change happened, which makes oneshot possible. * - * Called cyclic from the hrtimer softirq (driven by the timer - * softirq) allow_nohz signals, that we can switch into low-res nohz + * Called cyclically from the hrtimer softirq (driven by the timer + * softirq). 'allow_nohz' signals that we can switch into low-res NOHZ * mode, because high resolution timers are disabled (either compile * or runtime). Called with interrupts disabled. */ From a0fddaa0b5a587cc8d185f8802fe7e48493c43ed Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 15 Sep 2023 08:22:37 -0700 Subject: [PATCH 06/17] rtc: Add API function to return alarm time bound by hardware limit Add rtc_bound_alarmtime() to return the requested alarm timeout bound by the maxmum alarm timeout that is supported by a given RTC. Signed-off-by: Guenter Roeck Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20230915152238.1144706-2-linux@roeck-us.net --- include/linux/rtc.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/include/linux/rtc.h b/include/linux/rtc.h index 4c0bcbeb1f00..5f8e438a0312 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -225,6 +225,23 @@ static inline bool is_leap_year(unsigned int year) return (!(year % 4) && (year % 100)) || !(year % 400); } +/** + * rtc_bound_alarmtime() - Return alarm time bound by rtc limit + * @rtc: Pointer to rtc device structure + * @requested: Requested alarm timeout + * + * Return: Alarm timeout bound by maximum alarm time supported by rtc. + */ +static inline ktime_t rtc_bound_alarmtime(struct rtc_device *rtc, + ktime_t requested) +{ + if (rtc->alarm_offset_max && + rtc->alarm_offset_max * MSEC_PER_SEC < ktime_to_ms(requested)) + return ms_to_ktime(rtc->alarm_offset_max * MSEC_PER_SEC); + + return requested; +} + #define devm_rtc_register_device(device) \ __devm_rtc_register_device(THIS_MODULE, device) From 8ceea12d183cf29f28072dede218a04eda2a789c Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 15 Sep 2023 08:22:38 -0700 Subject: [PATCH 07/17] alarmtimer: Use maximum alarm time for suspend Some userspace applications use timerfd_create() to request wakeups after a long period of time. For example, a backup application may request a wakeup once per week. This is perfectly fine as long as the system does not try to suspend. However, if the system tries to suspend and the system's RTC does not support the required alarm timeout, the suspend operation will fail with an error such as rtc_cmos 00:01: Alarms can be up to one day in the future PM: dpm_run_callback(): platform_pm_suspend+0x0/0x4a returns -22 alarmtimer alarmtimer.4.auto: platform_pm_suspend+0x0/0x4a returned -22 after 117 usecs PM: Device alarmtimer.4.auto failed to suspend: error -22 This results in a refusal to suspend the system, causing substantial battery drain on affected systems. To fix the problem, use the maximum alarm time offset as reported by RTC drivers to set the maximum alarm time. While this may result in early wakeups from suspend, it is still much better than not suspending at all. Standardize system behavior if the requested alarm timeout is larger than the alarm timeout supported by the rtc chip. Currently, in this situation, the RTC driver will do one of the following: - It may return an error. - It may limit the alarm timeout to the maximum supported by the rtc chip. - It may mask the timeout by the maximum alarm timeout supported by the RTC chip (i.e. a requested timeout of 1 day + 1 minute may result in a 1 minute timeout). With this in place, if the RTC driver reports the maximum alarm timeout supported by the RTC chip, the system will always limit the alarm timeout to the maximum supported by the RTC chip. Signed-off-by: Guenter Roeck Signed-off-by: Thomas Gleixner Acked-by: John Stultz Link: https://lore.kernel.org/r/20230915152238.1144706-3-linux@roeck-us.net --- kernel/time/alarmtimer.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 8d9f13d847f0..4657cb8e8b1f 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -290,6 +290,17 @@ static int alarmtimer_suspend(struct device *dev) rtc_timer_cancel(rtc, &rtctimer); rtc_read_time(rtc, &tm); now = rtc_tm_to_ktime(tm); + + /* + * If the RTC alarm timer only supports a limited time offset, set the + * alarm time to the maximum supported value. + * The system may wake up earlier (possibly much earlier) than expected + * when the alarmtimer runs. This is the best the kernel can do if + * the alarmtimer exceeds the time that the rtc device can be programmed + * for. + */ + min = rtc_bound_alarmtime(rtc, min); + now = ktime_add(now, min); /* Set alarm, if in the past reject suspend briefly to handle */ From b7a8f1f7a8a25e09aaefebb6251a77f44cda638b Mon Sep 17 00:00:00 2001 From: Biju Das Date: Thu, 27 Jul 2023 09:18:44 +0100 Subject: [PATCH 08/17] dt-bindings: timer: renesas,rz-mtu3: Fix overflow/underflow interrupt names As per R01UH0914EJ0130 Rev.1.30 HW manual the MTU3 overflow/underflow interrupt names starts with 'tci' instead of 'tgi'. Fix this documentation issue by replacing below overflow/underflow interrupt names: - tgiv0->tciv0 - tgiv1->tciv1 - tgiu1->tciu1 - tgiv2->tciv2 - tgiu2->tciu2 - tgiv3->tciv3 - tgiv4->tciv4 - tgiv6->tciv6 - tgiv7->tciv7 - tgiv8->tciv8 - tgiu8->tciu8 Fixes: 0a9d6b54297e ("dt-bindings: timer: Document RZ/G2L MTU3a bindings") Cc: stable@kernel.org Signed-off-by: Biju Das Acked-by: Conor Dooley Reviewed-by: Geert Uytterhoeven Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20230727081848.100834-2-biju.das.jz@bp.renesas.com --- .../bindings/timer/renesas,rz-mtu3.yaml | 38 +++++++++---------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/Documentation/devicetree/bindings/timer/renesas,rz-mtu3.yaml b/Documentation/devicetree/bindings/timer/renesas,rz-mtu3.yaml index bffdab0b0185..fbac40b958dd 100644 --- a/Documentation/devicetree/bindings/timer/renesas,rz-mtu3.yaml +++ b/Documentation/devicetree/bindings/timer/renesas,rz-mtu3.yaml @@ -169,27 +169,27 @@ properties: - const: tgib0 - const: tgic0 - const: tgid0 - - const: tgiv0 + - const: tciv0 - const: tgie0 - const: tgif0 - const: tgia1 - const: tgib1 - - const: tgiv1 - - const: tgiu1 + - const: tciv1 + - const: tciu1 - const: tgia2 - const: tgib2 - - const: tgiv2 - - const: tgiu2 + - const: tciv2 + - const: tciu2 - const: tgia3 - const: tgib3 - const: tgic3 - const: tgid3 - - const: tgiv3 + - const: tciv3 - const: tgia4 - const: tgib4 - const: tgic4 - const: tgid4 - - const: tgiv4 + - const: tciv4 - const: tgiu5 - const: tgiv5 - const: tgiw5 @@ -197,18 +197,18 @@ properties: - const: tgib6 - const: tgic6 - const: tgid6 - - const: tgiv6 + - const: tciv6 - const: tgia7 - const: tgib7 - const: tgic7 - const: tgid7 - - const: tgiv7 + - const: tciv7 - const: tgia8 - const: tgib8 - const: tgic8 - const: tgid8 - - const: tgiv8 - - const: tgiu8 + - const: tciv8 + - const: tciu8 clocks: maxItems: 1 @@ -285,16 +285,16 @@ examples: , , ; - interrupt-names = "tgia0", "tgib0", "tgic0", "tgid0", "tgiv0", "tgie0", + interrupt-names = "tgia0", "tgib0", "tgic0", "tgid0", "tciv0", "tgie0", "tgif0", - "tgia1", "tgib1", "tgiv1", "tgiu1", - "tgia2", "tgib2", "tgiv2", "tgiu2", - "tgia3", "tgib3", "tgic3", "tgid3", "tgiv3", - "tgia4", "tgib4", "tgic4", "tgid4", "tgiv4", + "tgia1", "tgib1", "tciv1", "tciu1", + "tgia2", "tgib2", "tciv2", "tciu2", + "tgia3", "tgib3", "tgic3", "tgid3", "tciv3", + "tgia4", "tgib4", "tgic4", "tgid4", "tciv4", "tgiu5", "tgiv5", "tgiw5", - "tgia6", "tgib6", "tgic6", "tgid6", "tgiv6", - "tgia7", "tgib7", "tgic7", "tgid7", "tgiv7", - "tgia8", "tgib8", "tgic8", "tgid8", "tgiv8", "tgiu8"; + "tgia6", "tgib6", "tgic6", "tgid6", "tciv6", + "tgia7", "tgib7", "tgic7", "tgid7", "tciv7", + "tgia8", "tgib8", "tgic8", "tgid8", "tciv8", "tciu8"; clocks = <&cpg CPG_MOD R9A07G044_MTU_X_MCK_MTU3>; power-domains = <&cpg>; resets = <&cpg R9A07G044_MTU_X_PRESET_MTU3>; From b121e7881b8975cf8dc24d9bd71a5af81ed8a6b1 Mon Sep 17 00:00:00 2001 From: Biju Das Date: Thu, 27 Jul 2023 09:18:45 +0100 Subject: [PATCH 09/17] dt-bindings: timer: renesas,rz-mtu3: Improve documentation Fix the documentation issues pointed by Pavel while backporting it to 6.1.y-cip. - Replace '32- bit'->'32-bit' - Consistently remove '.' at the end of line for the specifications - Replace ' (excluding MTU8)'-> '(excluding MTU8)' Reported-by: Pavel Machek Closes: https://lore.kernel.org/all/ZH79%2FUjgYg+0Ruiu@duo.ucw.cz Signed-off-by: Biju Das Acked-by: Conor Dooley Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20230727081848.100834-3-biju.das.jz@bp.renesas.com --- .../bindings/timer/renesas,rz-mtu3.yaml | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/Documentation/devicetree/bindings/timer/renesas,rz-mtu3.yaml b/Documentation/devicetree/bindings/timer/renesas,rz-mtu3.yaml index fbac40b958dd..670a2ebaacdb 100644 --- a/Documentation/devicetree/bindings/timer/renesas,rz-mtu3.yaml +++ b/Documentation/devicetree/bindings/timer/renesas,rz-mtu3.yaml @@ -11,8 +11,8 @@ maintainers: description: | This hardware block consists of eight 16-bit timer channels and one - 32- bit timer channel. It supports the following specifications: - - Pulse input/output: 28 lines max. + 32-bit timer channel. It supports the following specifications: + - Pulse input/output: 28 lines max - Pulse input 3 lines - Count clock 11 clocks for each channel (14 clocks for MTU0, 12 clocks for MTU2, and 10 clocks for MTU5, four clocks for MTU1-MTU2 combination @@ -23,11 +23,11 @@ description: | - Input capture function (noise filter setting available) - Counter-clearing operation - Simultaneous writing to multiple timer counters (TCNT) - (excluding MTU8). + (excluding MTU8) - Simultaneous clearing on compare match or input capture - (excluding MTU8). + (excluding MTU8) - Simultaneous input and output to registers in synchronization with - counter operations (excluding MTU8). + counter operations (excluding MTU8) - Up to 12-phase PWM output in combination with synchronous operation (excluding MTU8) - [MTU0 MTU3, MTU4, MTU6, MTU7, and MTU8] @@ -40,26 +40,26 @@ description: | - [MTU3, MTU4, MTU6, and MTU7] - Through interlocked operation of MTU3/4 and MTU6/7, the positive and negative signals in six phases (12 phases in total) can be output in - complementary PWM and reset-synchronized PWM operation. + complementary PWM and reset-synchronized PWM operation - In complementary PWM mode, values can be transferred from buffer registers to temporary registers at crests and troughs of the timer- counter values or when the buffer registers (TGRD registers in MTU4 - and MTU7) are written to. - - Double-buffering selectable in complementary PWM mode. + and MTU7) are written to + - Double-buffering selectable in complementary PWM mode - [MTU3 and MTU4] - Through interlocking with MTU0, a mode for driving AC synchronous motors (brushless DC motors) by using complementary PWM output and reset-synchronized PWM output is settable and allows the selection - of two types of waveform output (chopping or level). + of two types of waveform output (chopping or level) - [MTU5] - - Capable of operation as a dead-time compensation counter. + - Capable of operation as a dead-time compensation counter - [MTU0/MTU5, MTU1, MTU2, and MTU8] - 32-bit phase counting mode specifiable by combining MTU1 and MTU2 and - through interlocked operation with MTU0/MTU5 and MTU8. + through interlocked operation with MTU0/MTU5 and MTU8 - Interrupt-skipping function - In complementary PWM mode, interrupts on crests and troughs of counter values and triggers to start conversion by the A/D converter can be - skipped. + skipped - Interrupt sources: 43 sources. - Buffer operation: - Automatic transfer of register data (transfer from the buffer @@ -68,9 +68,9 @@ description: | - A/D converter start triggers can be generated - A/D converter start request delaying function enables A/D converter to be started with any desired timing and to be synchronized with - PWM output. + PWM output - Low power consumption function - - The MTU3a can be placed in the module-stop state. + - The MTU3a can be placed in the module-stop state There are two phase counting modes. 16-bit phase counting mode in which MTU1 and MTU2 operate independently, and cascade connection 32-bit phase From 078a5babf2bc92eba04b8f9162e5fea7afc2749e Mon Sep 17 00:00:00 2001 From: Biju Das Date: Thu, 27 Jul 2023 09:18:46 +0100 Subject: [PATCH 10/17] dt-bindings: timer: renesas,rz-mtu3: Document RZ/{G2UL,Five} SoCs Add MTU3a binding documentation for Renesas RZ/{G2UL,Five} SoCs. MTU3a block is identical to one found on RZ/G2L, so no driver changes are required. The fallback compatible string "renesas,rz-mtu3" will be used on RZ/{G2UL,Five}. Signed-off-by: Biju Das Acked-by: Conor Dooley Reviewed-by: Geert Uytterhoeven Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20230727081848.100834-4-biju.das.jz@bp.renesas.com --- Documentation/devicetree/bindings/timer/renesas,rz-mtu3.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/timer/renesas,rz-mtu3.yaml b/Documentation/devicetree/bindings/timer/renesas,rz-mtu3.yaml index 670a2ebaacdb..3931054b42fb 100644 --- a/Documentation/devicetree/bindings/timer/renesas,rz-mtu3.yaml +++ b/Documentation/devicetree/bindings/timer/renesas,rz-mtu3.yaml @@ -109,6 +109,7 @@ properties: compatible: items: - enum: + - renesas,r9a07g043-mtu3 # RZ/{G2UL,Five} - renesas,r9a07g044-mtu3 # RZ/G2{L,LC} - renesas,r9a07g054-mtu3 # RZ/V2L - const: renesas,rz-mtu3 From 8051a993ce222a5158bccc6ac22ace9253dd71cb Mon Sep 17 00:00:00 2001 From: Jacky Bai Date: Mon, 9 Oct 2023 16:39:22 +0800 Subject: [PATCH 11/17] clocksource/drivers/timer-imx-gpt: Fix potential memory leak Fix coverity Issue CID 250382: Resource leak (RESOURCE_LEAK). Add kfree when error return. Signed-off-by: Jacky Bai Reviewed-by: Peng Fan Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20231009083922.1942971-1-ping.bai@nxp.com --- drivers/clocksource/timer-imx-gpt.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/clocksource/timer-imx-gpt.c b/drivers/clocksource/timer-imx-gpt.c index 28ab4f1a7c71..6a878d227a13 100644 --- a/drivers/clocksource/timer-imx-gpt.c +++ b/drivers/clocksource/timer-imx-gpt.c @@ -434,12 +434,16 @@ static int __init mxc_timer_init_dt(struct device_node *np, enum imx_gpt_type t return -ENOMEM; imxtm->base = of_iomap(np, 0); - if (!imxtm->base) - return -ENXIO; + if (!imxtm->base) { + ret = -ENXIO; + goto err_kfree; + } imxtm->irq = irq_of_parse_and_map(np, 0); - if (imxtm->irq <= 0) - return -EINVAL; + if (imxtm->irq <= 0) { + ret = -EINVAL; + goto err_kfree; + } imxtm->clk_ipg = of_clk_get_by_name(np, "ipg"); @@ -452,11 +456,15 @@ static int __init mxc_timer_init_dt(struct device_node *np, enum imx_gpt_type t ret = _mxc_timer_init(imxtm); if (ret) - return ret; + goto err_kfree; initialized = 1; return 0; + +err_kfree: + kfree(imxtm); + return ret; } static int __init imx1_timer_init_dt(struct device_node *np) From 12590d4d0e331d3cb9e6b3494515cd61c8a6624e Mon Sep 17 00:00:00 2001 From: Ivaylo Dimitrov Date: Tue, 3 Oct 2023 08:50:20 +0300 Subject: [PATCH 12/17] drivers/clocksource/timer-ti-dm: Don't call clk_get_rate() in stop function clk_get_rate() might sleep, and that prevents dm-timer based PWM from being used from atomic context. Fix that by getting fclk rate in probe() and using a notifier in case rate changes. Fixes: af04aa856e93 ("ARM: OMAP: Move dmtimer driver out of plat-omap to drivers under clocksource") Signed-off-by: Ivaylo Dimitrov Reviewed-by: Tony Lindgren Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/1696312220-11550-1-git-send-email-ivo.g.dimitrov.75@gmail.com --- drivers/clocksource/timer-ti-dm.c | 36 ++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/drivers/clocksource/timer-ti-dm.c b/drivers/clocksource/timer-ti-dm.c index 09ab29cb7f64..5f60f6bd3386 100644 --- a/drivers/clocksource/timer-ti-dm.c +++ b/drivers/clocksource/timer-ti-dm.c @@ -140,6 +140,8 @@ struct dmtimer { struct platform_device *pdev; struct list_head node; struct notifier_block nb; + struct notifier_block fclk_nb; + unsigned long fclk_rate; }; static u32 omap_reserved_systimers; @@ -253,8 +255,7 @@ static inline void __omap_dm_timer_enable_posted(struct dmtimer *timer) timer->posted = OMAP_TIMER_POSTED; } -static inline void __omap_dm_timer_stop(struct dmtimer *timer, - unsigned long rate) +static inline void __omap_dm_timer_stop(struct dmtimer *timer) { u32 l; @@ -269,7 +270,7 @@ static inline void __omap_dm_timer_stop(struct dmtimer *timer, * Wait for functional clock period x 3.5 to make sure that * timer is stopped */ - udelay(3500000 / rate + 1); + udelay(3500000 / timer->fclk_rate + 1); #endif } @@ -348,6 +349,21 @@ static int omap_timer_context_notifier(struct notifier_block *nb, return NOTIFY_OK; } +static int omap_timer_fclk_notifier(struct notifier_block *nb, + unsigned long event, void *data) +{ + struct clk_notifier_data *clk_data = data; + struct dmtimer *timer = container_of(nb, struct dmtimer, fclk_nb); + + switch (event) { + case POST_RATE_CHANGE: + timer->fclk_rate = clk_data->new_rate; + return NOTIFY_OK; + default: + return NOTIFY_DONE; + } +} + static int omap_dm_timer_reset(struct dmtimer *timer) { u32 l, timeout = 100000; @@ -754,7 +770,6 @@ static int omap_dm_timer_stop(struct omap_dm_timer *cookie) { struct dmtimer *timer; struct device *dev; - unsigned long rate = 0; timer = to_dmtimer(cookie); if (unlikely(!timer)) @@ -762,10 +777,7 @@ static int omap_dm_timer_stop(struct omap_dm_timer *cookie) dev = &timer->pdev->dev; - if (!timer->omap1) - rate = clk_get_rate(timer->fclk); - - __omap_dm_timer_stop(timer, rate); + __omap_dm_timer_stop(timer); pm_runtime_put_sync(dev); @@ -1124,6 +1136,14 @@ static int omap_dm_timer_probe(struct platform_device *pdev) timer->fclk = devm_clk_get(dev, "fck"); if (IS_ERR(timer->fclk)) return PTR_ERR(timer->fclk); + + timer->fclk_nb.notifier_call = omap_timer_fclk_notifier; + ret = devm_clk_notifier_register(dev, timer->fclk, + &timer->fclk_nb); + if (ret) + return ret; + + timer->fclk_rate = clk_get_rate(timer->fclk); } else { timer->fclk = ERR_PTR(-ENODEV); } From fd73c011a123a66d88998f356c920d33d87524cf Mon Sep 17 00:00:00 2001 From: Yang Li Date: Thu, 31 Aug 2023 12:14:14 +0800 Subject: [PATCH 13/17] clocksource/drivers/sun5i: Remove surplus dev_err() when using platform_get_irq() There is no need to call the dev_err() function directly to print a custom message when handling an error from either the platform_get_irq() or platform_get_irq_byname() functions as both are going to display an appropriate error message in case of a failure. ./drivers/clocksource/timer-sun5i.c:260:2-9: line 260 is redundant because platform_get_irq() already prints an error Signed-off-by: Yang Li Acked-by: Jernej Skrabec Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20230831041414.66434-1-yang.lee@linux.alibaba.com --- drivers/clocksource/timer-sun5i.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/clocksource/timer-sun5i.c b/drivers/clocksource/timer-sun5i.c index 69fee3540d37..0d229a9058da 100644 --- a/drivers/clocksource/timer-sun5i.c +++ b/drivers/clocksource/timer-sun5i.c @@ -256,10 +256,8 @@ static int sun5i_timer_probe(struct platform_device *pdev) } irq = platform_get_irq(pdev, 0); - if (irq < 0) { - dev_err(dev, "Can't get IRQ\n"); + if (irq < 0) return irq; - } clk = devm_clk_get_enabled(dev, NULL); if (IS_ERR(clk)) { From d7f546c751e8c873302331af6a203ee61f894e9d Mon Sep 17 00:00:00 2001 From: Sunil V L Date: Wed, 27 Sep 2023 22:30:15 +0530 Subject: [PATCH 14/17] clocksource/timer-riscv: ACPI: Add timer_cannot_wakeup_cpu The timer capability to wakeup the cpu irrespective of its idle state is provided by the flag in RHCT. Update the timer code to set this flag. Signed-off-by: Sunil V L Reviewed-by: Conor Dooley Reviewed-by: Andrew Jones Reviewed-by: Samuel Holland Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20230927170015.295232-5-sunilvl@ventanamicro.com --- drivers/clocksource/timer-riscv.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/clocksource/timer-riscv.c b/drivers/clocksource/timer-riscv.c index da3071b387eb..50198657230e 100644 --- a/drivers/clocksource/timer-riscv.c +++ b/drivers/clocksource/timer-riscv.c @@ -212,6 +212,10 @@ TIMER_OF_DECLARE(riscv_timer, "riscv", riscv_timer_init_dt); #ifdef CONFIG_ACPI static int __init riscv_timer_acpi_init(struct acpi_table_header *table) { + struct acpi_table_rhct *rhct = (struct acpi_table_rhct *)table; + + riscv_timer_cannot_wake_cpu = rhct->flags & ACPI_RHCT_TIMER_CANNOT_WAKEUP_CPU; + return riscv_timer_init_common(); } From 6d3bc4c02d59996d1d3180d8ed409a9d7d5900e0 Mon Sep 17 00:00:00 2001 From: Ronald Wahl Date: Sat, 7 Oct 2023 18:17:13 +0200 Subject: [PATCH 15/17] clocksource/drivers/timer-atmel-tcb: Fix initialization on SAM9 hardware On SAM9 hardware two cascaded 16 bit timers are used to form a 32 bit high resolution timer that is used as scheduler clock when the kernel has been configured that way (CONFIG_ATMEL_CLOCKSOURCE_TCB). The driver initially triggers a reset-to-zero of the two timers but this reset is only performed on the next rising clock. For the first timer this is ok - it will be in the next 60ns (16MHz clock). For the chained second timer this will only happen after the first timer overflows, i.e. after 2^16 clocks (~4ms with a 16MHz clock). So with other words the scheduler clock resets to 0 after the first 2^16 clock cycles. It looks like that the scheduler does not like this and behaves wrongly over its lifetime, e.g. some tasks are scheduled with a long delay. Why that is and if there are additional requirements for this behaviour has not been further analysed. There is a simple fix for resetting the second timer as well when the first timer is reset and this is to set the ATMEL_TC_ASWTRG_SET bit in the Channel Mode register (CMR) of the first timer. This will also rise the TIOA line (clock input of the second timer) when a software trigger respective SYNC is issued. Signed-off-by: Ronald Wahl Acked-by: Alexandre Belloni Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20231007161803.31342-1-rwahl@gmx.de --- drivers/clocksource/timer-atmel-tcb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clocksource/timer-atmel-tcb.c b/drivers/clocksource/timer-atmel-tcb.c index 27af17c99590..2a90c92a9182 100644 --- a/drivers/clocksource/timer-atmel-tcb.c +++ b/drivers/clocksource/timer-atmel-tcb.c @@ -315,6 +315,7 @@ static void __init tcb_setup_dual_chan(struct atmel_tc *tc, int mck_divisor_idx) writel(mck_divisor_idx /* likely divide-by-8 */ | ATMEL_TC_WAVE | ATMEL_TC_WAVESEL_UP /* free-run */ + | ATMEL_TC_ASWTRG_SET /* TIOA0 rises at software trigger */ | ATMEL_TC_ACPA_SET /* TIOA0 rises at 0 */ | ATMEL_TC_ACPC_CLEAR, /* (duty cycle 50%) */ tcaddr + ATMEL_TC_REG(0, CMR)); From 81824f7c8fb0485a5007bf0e60718afdecdef60c Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Fri, 15 Sep 2023 11:10:53 +0300 Subject: [PATCH 16/17] dt-bindings: timers: Add Cirrus EP93xx Add device tree bindings for the Cirrus Logic EP93xx timer block used in these SoCs. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Nikita Shubin Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20230915-ep93xx-v4-11-a1d779dcec10@maquefel.me --- .../bindings/timer/cirrus,ep9301-timer.yaml | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 Documentation/devicetree/bindings/timer/cirrus,ep9301-timer.yaml diff --git a/Documentation/devicetree/bindings/timer/cirrus,ep9301-timer.yaml b/Documentation/devicetree/bindings/timer/cirrus,ep9301-timer.yaml new file mode 100644 index 000000000000..e463e11e259d --- /dev/null +++ b/Documentation/devicetree/bindings/timer/cirrus,ep9301-timer.yaml @@ -0,0 +1,49 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/timer/cirrus,ep9301-timer.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Cirrus Logic EP93xx timer + +maintainers: + - Alexander Sverdlin + - Nikita Shubin + +properties: + compatible: + oneOf: + - const: cirrus,ep9301-timer + - items: + - enum: + - cirrus,ep9302-timer + - cirrus,ep9307-timer + - cirrus,ep9312-timer + - cirrus,ep9315-timer + - const: cirrus,ep9301-timer + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + resets: + maxItems: 1 + +required: + - compatible + - reg + - interrupts + +additionalProperties: false + +examples: + - | + timer@80810000 { + compatible = "cirrus,ep9301-timer"; + reg = <0x80810000 0x100>; + interrupt-parent = <&vic1>; + interrupts = <19>; + }; +... From c28ca80ba3b531a79402d61046aef83272f86b08 Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Fri, 15 Sep 2023 11:10:54 +0300 Subject: [PATCH 17/17] clocksource: ep93xx: Add driver for Cirrus Logic EP93xx Rewrite EP93xx timer driver located in arch/arm/mach-ep93xx/timer-ep93xx.c trying to do everything the device tree way: - Make every IO-access relative to a base address and dynamic so we can do a dynamic ioremap and get going. - Find register range and interrupt from the device tree. Reviewed-by: Linus Walleij Tested-by: Alexander Sverdlin Signed-off-by: Nikita Shubin Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20230915-ep93xx-v4-12-a1d779dcec10@maquefel.me --- drivers/clocksource/Kconfig | 11 ++ drivers/clocksource/Makefile | 1 + drivers/clocksource/timer-ep93xx.c | 190 +++++++++++++++++++++++++++++ 3 files changed, 202 insertions(+) create mode 100644 drivers/clocksource/timer-ep93xx.c diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index 0ba0dc4ecf06..34faa0320ece 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -732,4 +732,15 @@ config GOLDFISH_TIMER help Support for the timer/counter of goldfish-rtc +config EP93XX_TIMER + bool "Cirrus Logic ep93xx timer driver" if COMPILE_TEST + depends on ARCH_EP93XX + depends on GENERIC_CLOCKEVENTS + depends on HAS_IOMEM + select CLKSRC_MMIO + select TIMER_OF + help + Enables support for the Cirrus Logic timer block + EP93XX. + endmenu diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile index 368c3461dab8..4bb856e4df55 100644 --- a/drivers/clocksource/Makefile +++ b/drivers/clocksource/Makefile @@ -89,3 +89,4 @@ obj-$(CONFIG_MSC313E_TIMER) += timer-msc313e.o obj-$(CONFIG_GOLDFISH_TIMER) += timer-goldfish.o obj-$(CONFIG_GXP_TIMER) += timer-gxp.o obj-$(CONFIG_CLKSRC_LOONGSON1_PWM) += timer-loongson1-pwm.o +obj-$(CONFIG_EP93XX_TIMER) += timer-ep93xx.o diff --git a/drivers/clocksource/timer-ep93xx.c b/drivers/clocksource/timer-ep93xx.c new file mode 100644 index 000000000000..bc0ca6e12334 --- /dev/null +++ b/drivers/clocksource/timer-ep93xx.c @@ -0,0 +1,190 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Cirrus Logic EP93xx timer driver. + * Copyright (C) 2021 Nikita Shubin + * + * Based on a rewrite of arch/arm/mach-ep93xx/timer.c: + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +/************************************************************************* + * Timer handling for EP93xx + ************************************************************************* + * The ep93xx has four internal timers. Timers 1, 2 (both 16 bit) and + * 3 (32 bit) count down at 508 kHz, are self-reloading, and can generate + * an interrupt on underflow. Timer 4 (40 bit) counts down at 983.04 kHz, + * is free-running, and can't generate interrupts. + * + * The 508 kHz timers are ideal for use for the timer interrupt, as the + * most common values of HZ divide 508 kHz nicely. We pick the 32 bit + * timer (timer 3) to get as long sleep intervals as possible when using + * CONFIG_NO_HZ. + * + * The higher clock rate of timer 4 makes it a better choice than the + * other timers for use as clock source and for sched_clock(), providing + * a stable 40 bit time base. + ************************************************************************* + */ + +#define EP93XX_TIMER1_LOAD 0x00 +#define EP93XX_TIMER1_VALUE 0x04 +#define EP93XX_TIMER1_CONTROL 0x08 +#define EP93XX_TIMER123_CONTROL_ENABLE BIT(7) +#define EP93XX_TIMER123_CONTROL_MODE BIT(6) +#define EP93XX_TIMER123_CONTROL_CLKSEL BIT(3) +#define EP93XX_TIMER1_CLEAR 0x0c +#define EP93XX_TIMER2_LOAD 0x20 +#define EP93XX_TIMER2_VALUE 0x24 +#define EP93XX_TIMER2_CONTROL 0x28 +#define EP93XX_TIMER2_CLEAR 0x2c +/* + * This read-only register contains the low word of the time stamp debug timer + * ( Timer4). When this register is read, the high byte of the Timer4 counter is + * saved in the Timer4ValueHigh register. + */ +#define EP93XX_TIMER4_VALUE_LOW 0x60 +#define EP93XX_TIMER4_VALUE_HIGH 0x64 +#define EP93XX_TIMER4_VALUE_HIGH_ENABLE BIT(8) +#define EP93XX_TIMER3_LOAD 0x80 +#define EP93XX_TIMER3_VALUE 0x84 +#define EP93XX_TIMER3_CONTROL 0x88 +#define EP93XX_TIMER3_CLEAR 0x8c + +#define EP93XX_TIMER123_RATE 508469 +#define EP93XX_TIMER4_RATE 983040 + +struct ep93xx_tcu { + void __iomem *base; +}; + +static struct ep93xx_tcu *ep93xx_tcu; + +static u64 ep93xx_clocksource_read(struct clocksource *c) +{ + struct ep93xx_tcu *tcu = ep93xx_tcu; + + return lo_hi_readq(tcu->base + EP93XX_TIMER4_VALUE_LOW) & GENMASK_ULL(39, 0); +} + +static u64 notrace ep93xx_read_sched_clock(void) +{ + return ep93xx_clocksource_read(NULL); +} + +static int ep93xx_clkevt_set_next_event(unsigned long next, + struct clock_event_device *evt) +{ + struct ep93xx_tcu *tcu = ep93xx_tcu; + /* Default mode: periodic, off, 508 kHz */ + u32 tmode = EP93XX_TIMER123_CONTROL_MODE | + EP93XX_TIMER123_CONTROL_CLKSEL; + + /* Clear timer */ + writel(tmode, tcu->base + EP93XX_TIMER3_CONTROL); + + /* Set next event */ + writel(next, tcu->base + EP93XX_TIMER3_LOAD); + writel(tmode | EP93XX_TIMER123_CONTROL_ENABLE, + tcu->base + EP93XX_TIMER3_CONTROL); + return 0; +} + +static int ep93xx_clkevt_shutdown(struct clock_event_device *evt) +{ + struct ep93xx_tcu *tcu = ep93xx_tcu; + /* Disable timer */ + writel(0, tcu->base + EP93XX_TIMER3_CONTROL); + + return 0; +} + +static struct clock_event_device ep93xx_clockevent = { + .name = "timer1", + .features = CLOCK_EVT_FEAT_ONESHOT, + .set_state_shutdown = ep93xx_clkevt_shutdown, + .set_state_oneshot = ep93xx_clkevt_shutdown, + .tick_resume = ep93xx_clkevt_shutdown, + .set_next_event = ep93xx_clkevt_set_next_event, + .rating = 300, +}; + +static irqreturn_t ep93xx_timer_interrupt(int irq, void *dev_id) +{ + struct ep93xx_tcu *tcu = ep93xx_tcu; + struct clock_event_device *evt = dev_id; + + /* Writing any value clears the timer interrupt */ + writel(1, tcu->base + EP93XX_TIMER3_CLEAR); + + evt->event_handler(evt); + + return IRQ_HANDLED; +} + +static int __init ep93xx_timer_of_init(struct device_node *np) +{ + int irq; + unsigned long flags = IRQF_TIMER | IRQF_IRQPOLL; + struct ep93xx_tcu *tcu; + int ret; + + tcu = kzalloc(sizeof(*tcu), GFP_KERNEL); + if (!tcu) + return -ENOMEM; + + tcu->base = of_iomap(np, 0); + if (!tcu->base) { + pr_err("Can't remap registers\n"); + ret = -ENXIO; + goto out_free; + } + + ep93xx_tcu = tcu; + + irq = irq_of_parse_and_map(np, 0); + if (irq == 0) + irq = -EINVAL; + if (irq < 0) { + pr_err("EP93XX Timer Can't parse IRQ %d", irq); + goto out_free; + } + + /* Enable and register clocksource and sched_clock on timer 4 */ + writel(EP93XX_TIMER4_VALUE_HIGH_ENABLE, + tcu->base + EP93XX_TIMER4_VALUE_HIGH); + clocksource_mmio_init(NULL, "timer4", + EP93XX_TIMER4_RATE, 200, 40, + ep93xx_clocksource_read); + sched_clock_register(ep93xx_read_sched_clock, 40, + EP93XX_TIMER4_RATE); + + /* Set up clockevent on timer 3 */ + if (request_irq(irq, ep93xx_timer_interrupt, flags, "ep93xx timer", + &ep93xx_clockevent)) + pr_err("Failed to request irq %d (ep93xx timer)\n", irq); + + clockevents_config_and_register(&ep93xx_clockevent, + EP93XX_TIMER123_RATE, + 1, + UINT_MAX); + + return 0; + +out_free: + kfree(tcu); + return ret; +} +TIMER_OF_DECLARE(ep93xx_timer, "cirrus,ep9301-timer", ep93xx_timer_of_init);