From c41c0ebfa1e0eb40cfb11846a7a579eb8d9dfb5f Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Fri, 26 Sep 2025 14:49:10 +0200 Subject: [PATCH 1/3] printk/nbcon: Block printk kthreads when any CPU is in an emergency context In emergency contexts, printk() tries to flush messages directly even on nbcon consoles. And it is allowed to takeover the console ownership and interrupt the printk kthread in the middle of a message. Only one takeover and one repeated message should be enough in most situations. The first emergency message flushes the backlog and printk kthreads get to sleep. Next emergency messages are flushed directly and printk() does not wake up the kthreads. However, the one takeover is not guaranteed. Any printk() in normal context on another CPU could wake up the kthreads. Or a new emergency message might be added before the kthreads get to sleep. Note that the interrupted .write_thread() callbacks usually have to call nbcon_reacquire_nobuf() and restore the original device setting before checking for pending messages. The risk of the repeated takeovers will be even bigger because __nbcon_atomic_flush_pending_con is going to release the console ownership after each emitted record. It will be needed to prevent hardlockup reports on other CPUs which are busy waiting for the context ownership, for example, by nbcon_reacquire_nobuf() or __uart_port_nbcon_acquire(). The repeated takeovers break the output, for example: [ 5042.650211][ T2220] Call Trace: [ 5042.6511 ** replaying previous printk message ** [ 5042.651192][ T2220] [ 5042.652160][ T2220] kunit_run_ ** replaying previous printk message ** [ 5042.652160][ T2220] kunit_run_tests+0x72/0x90 [ 5042.653340][ T22 ** replaying previous printk message ** [ 5042.653340][ T2220] ? srso_alias_return_thunk+0x5/0xfbef5 [ 5042.654628][ T2220] ? stack_trace_save+0x4d/0x70 [ 5042.6553 ** replaying previous printk message ** [ 5042.655394][ T2220] ? srso_alias_return_thunk+0x5/0xfbef5 [ 5042.656713][ T2220] ? save_trace+0x5b/0x180 A more robust solution is to block the printk kthread entirely whenever *any* CPU enters an emergency context. This ensures that critical messages can be flushed without contention from the normal, non-atomic printing path. Link: https://lore.kernel.org/all/aNQO-zl3k1l4ENfy@pathway.suse.cz Reviewed-by: Andrew Murray Reviewed-by: John Ogness Link: https://patch.msgid.link/20250926124912.243464-2-pmladek@suse.com [pmladek@suse.com: Added changes proposed by John Ogness] Signed-off-by: Petr Mladek --- kernel/printk/nbcon.c | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c index 558ef3177976..dda336ffabdd 100644 --- a/kernel/printk/nbcon.c +++ b/kernel/printk/nbcon.c @@ -118,6 +118,9 @@ * from scratch. */ +/* Counter of active nbcon emergency contexts. */ +static atomic_t nbcon_cpu_emergency_cnt = ATOMIC_INIT(0); + /** * nbcon_state_set - Helper function to set the console state * @con: Console to update @@ -1163,6 +1166,16 @@ static bool nbcon_kthread_should_wakeup(struct console *con, struct nbcon_contex if (kthread_should_stop()) return true; + /* + * Block the kthread when the system is in an emergency or panic mode. + * It increases the chance that these contexts would be able to show + * the messages directly. And it reduces the risk of interrupted writes + * where the context with a higher priority takes over the nbcon console + * ownership in the middle of a message. + */ + if (unlikely(atomic_read(&nbcon_cpu_emergency_cnt))) + return false; + cookie = console_srcu_read_lock(); flags = console_srcu_read_flags(con); @@ -1214,6 +1227,13 @@ static int nbcon_kthread_func(void *__console) if (kthread_should_stop()) return 0; + /* + * Block the kthread when the system is in an emergency or panic + * mode. See nbcon_kthread_should_wakeup() for more details. + */ + if (unlikely(atomic_read(&nbcon_cpu_emergency_cnt))) + goto wait_for_event; + backlog = false; /* @@ -1655,6 +1675,8 @@ void nbcon_cpu_emergency_enter(void) preempt_disable(); + atomic_inc(&nbcon_cpu_emergency_cnt); + cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting(); (*cpu_emergency_nesting)++; } @@ -1669,10 +1691,24 @@ void nbcon_cpu_emergency_exit(void) unsigned int *cpu_emergency_nesting; cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting(); - if (!WARN_ON_ONCE(*cpu_emergency_nesting == 0)) (*cpu_emergency_nesting)--; + /* + * Wake up kthreads because there might be some pending messages + * added by other CPUs with normal priority since the last flush + * in the emergency context. + */ + if (!WARN_ON_ONCE(atomic_read(&nbcon_cpu_emergency_cnt) == 0)) { + if (atomic_dec_return(&nbcon_cpu_emergency_cnt) == 0) { + struct console_flush_type ft; + + printk_get_console_flush_type(&ft); + if (ft.nbcon_offload) + nbcon_kthreads_wake(); + } + } + preempt_enable(); } From 4c3ba0d5925685d27490078bf2d54ff9c0a0e67b Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Fri, 26 Sep 2025 14:49:11 +0200 Subject: [PATCH 2/3] printk/nbcon/panic: Allow printk kthread to sleep when the system is in panic The printk kthread might be running when there is a panic in progress. But it is not able to acquire the console ownership any longer. Prevent the desperate attempts to acquire the ownership and allow sleeping in panic. It would make it behave the same as when there is any CPU in an emergency context. Reviewed-by: Andrew Murray Reviewed-by: John Ogness Link: https://patch.msgid.link/20250926124912.243464-3-pmladek@suse.com [pmladek@suse.com: Rebased on top of 6.18-rc1 (panic_in_progress() moved to panic.c)] Signed-off-by: Petr Mladek --- kernel/printk/nbcon.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c index dda336ffabdd..2fd2f906e134 100644 --- a/kernel/printk/nbcon.c +++ b/kernel/printk/nbcon.c @@ -1173,7 +1173,8 @@ static bool nbcon_kthread_should_wakeup(struct console *con, struct nbcon_contex * where the context with a higher priority takes over the nbcon console * ownership in the middle of a message. */ - if (unlikely(atomic_read(&nbcon_cpu_emergency_cnt))) + if (unlikely(atomic_read(&nbcon_cpu_emergency_cnt)) || + unlikely(panic_in_progress())) return false; cookie = console_srcu_read_lock(); @@ -1231,7 +1232,8 @@ static int nbcon_kthread_func(void *__console) * Block the kthread when the system is in an emergency or panic * mode. See nbcon_kthread_should_wakeup() for more details. */ - if (unlikely(atomic_read(&nbcon_cpu_emergency_cnt))) + if (unlikely(atomic_read(&nbcon_cpu_emergency_cnt)) || + unlikely(panic_in_progress())) goto wait_for_event; backlog = false; From d5d399efff65773ed82ddaf6c11a0fcfdb5eb029 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Fri, 26 Sep 2025 14:49:12 +0200 Subject: [PATCH 3/3] printk/nbcon: Release nbcon consoles ownership in atomic flush after each emitted record printk() tries to flush messages with NBCON_PRIO_EMERGENCY on nbcon consoles immediately. It might take seconds to flush all pending lines on slow serial consoles. Note that there might be hundreds of messages, for example: [ 3.771531][ T1] pci 0000:3e:08.1: [8086:324 ** replaying previous printk message ** [ 3.771531][ T1] pci 0000:3e:08.1: [8086:3246] type 00 class 0x088000 PCIe Root Complex Integrated Endpoint [ ... more than 2000 lines, about 200kB messages ... ] [ 3.837752][ T1] pci 0000:20:01.0: Adding to iommu group 18 [ 3.837851][ T ** replaying previous printk message ** [ 3.837851][ T1] pci 0000:20:03.0: Adding to iommu group 19 [ 3.837946][ T1] pci 0000:20:05.0: Adding to iommu group 20 [ ... more than 500 messages for iommu groups 21-590 ...] [ 3.912932][ T1] pci 0000:f6:00.1: Adding to iommu group 591 [ 3.913070][ T1] pci 0000:f6:00.2: Adding to iommu group 592 [ 3.913243][ T1] DMAR: Intel(R) Virtualization Technology for Directed I/O [ 3.913245][ T1] PCI-DMA: Using software bounce buffering for IO (SWIOTLB) [ 3.913245][ T1] software IO TLB: mapped [mem 0x000000004f000000-0x0000000053000000] (64MB) [ 3.913324][ T1] RAPL PMU: API unit is 2^-32 Joules, 3 fixed counters, 655360 ms ovfl timer [ 3.913325][ T1] RAPL PMU: hw unit of domain package 2^-14 Joules [ 3.913326][ T1] RAPL PMU: hw unit of domain dram 2^-14 Joules [ 3.913327][ T1] RAPL PMU: hw unit of domain psys 2^-0 Joules [ 3.933486][ T1] ------------[ cut here ]------------ [ 3.933488][ T1] WARNING: CPU: 2 PID: 1 at arch/x86/events/intel/uncore.c:1156 uncore_pci_pmu_register+0x15e/0x180 [ 3.930291][ C0] watchdog: Watchdog detected hard LOCKUP on cpu 0 [ 3.930291][ C0] Kernel panic - not syncing: Hard LOCKUP [...] [ 3.930291][ C0] CPU: 0 UID: 0 PID: 18 Comm: pr/ttyS0 Not tainted... [...] [ 3.930291][ C0] RIP: 0010:nbcon_reacquire_nobuf+0x11/0x50 [ 3.930291][ C0] Call Trace: [...] [ 3.930291][ C0] [ 3.930291][ C0] serial8250_console_write+0x16d/0x5c0 [ 3.930291][ C0] nbcon_emit_next_record+0x22c/0x250 [ 3.930291][ C0] nbcon_emit_one+0x93/0xe0 [ 3.930291][ C0] nbcon_kthread_func+0x13c/0x1c0 The are visible two takeovers of the console ownership: - The 1st one is triggered by the "WARNING: CPU: 2 PID: 1 at arch/x86/..." line printed with NBCON_PRIO_EMERGENCY. - The 2nd one is triggered by the "Kernel panic - not syncing: Hard LOCKUP" line printed with NBCON_PRIO_PANIC. There are more than 2500 lines, at about 240kB, emitted between the takeover and the 1st "WARNING" line in the emergency context. This amount of pending messages had to be flushed by nbcon_atomic_flush_pending() when WARN() printed its first line. The atomic flush was holding the nbcon console context for too long so that it triggered hard lockup on the CPU running the printk kthread "pr/ttyS0". The kthread needed to reacquire the console ownership for restoring the original serial port state in serial8250_console_write(). Prevent the hardlockup by releasing the nbcon console ownership after each emitted record. Note that __nbcon_atomic_flush_pending_con() used to hold the console ownership all the time because it blocked the printk kthread. Otherwise the kthread tried to flush the messages in parallel which caused repeated takeovers and more replayed messages. It is not longer a problem because the repeated takeovers are blocked by the counter of emergency contexts, see nbcon_cpu_emergency_cnt. Link: https://lore.kernel.org/all/aNQO-zl3k1l4ENfy@pathway.suse.cz Reviewed-by: Andrew Murray Reviewed-by: John Ogness Link: https://patch.msgid.link/20250926124912.243464-4-pmladek@suse.com Signed-off-by: Petr Mladek --- kernel/printk/nbcon.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c index 2fd2f906e134..3060360fb357 100644 --- a/kernel/printk/nbcon.c +++ b/kernel/printk/nbcon.c @@ -1527,10 +1527,10 @@ static int __nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq, ctxt->prio = nbcon_get_default_prio(); ctxt->allow_unsafe_takeover = allow_unsafe_takeover; - if (!nbcon_context_try_acquire(ctxt, false)) - return -EPERM; - while (nbcon_seq_read(con) < stop_seq) { + if (!nbcon_context_try_acquire(ctxt, false)) + return -EPERM; + /* * nbcon_emit_next_record() returns false when the console was * handed over or taken over. In both cases the context is no @@ -1539,6 +1539,8 @@ static int __nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq, if (!nbcon_emit_next_record(&wctxt, true)) return -EAGAIN; + nbcon_context_release(ctxt); + if (!ctxt->backlog) { /* Are there reserved but not yet finalized records? */ if (nbcon_seq_read(con) < stop_seq) @@ -1547,7 +1549,6 @@ static int __nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq, } } - nbcon_context_release(ctxt); return err; }