From c6f4e552e1eae4a5726230254108213b085e1ae3 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 15 Nov 2025 19:07:41 -0800 Subject: [PATCH 01/18] rcutorture: Add a textbook-style trivial preemptible RCU This commit adds a trivial textbook implementation of preemptible RCU to rcutorture ("torture_type=trivial-preempt"), similar in spirit to the existing "torture_type=trivial" textbook implementation of non-preemptible RCU. Neither trivial RCU implementation has any value for production use, and are intended only to keep Paul honest in his introductory writings and presentations. [ paulmck: Apply kernel test robot feedback. ] Signed-off-by: Joel Fernandes --- include/linux/sched.h | 4 ++ kernel/rcu/Kconfig.debug | 11 ++++ kernel/rcu/rcu.h | 4 ++ kernel/rcu/rcutorture.c | 57 ++++++++++++++++++- kernel/rcu/update.c | 22 +++++++ .../rcutorture/configs/rcu/TRIVIAL-PREEMPT | 12 ++++ .../configs/rcu/TRIVIAL-PREEMPT.boot | 3 + 7 files changed, 112 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL-PREEMPT create mode 100644 tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL-PREEMPT.boot diff --git a/include/linux/sched.h b/include/linux/sched.h index 5a5d3dbc9cdf..ffb2ad9716f0 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -949,6 +949,10 @@ struct task_struct { struct srcu_ctr __percpu *trc_reader_scp; #endif /* #ifdef CONFIG_TASKS_TRACE_RCU */ +#ifdef CONFIG_TRIVIAL_PREEMPT_RCU + int rcu_trivial_preempt_nesting; +#endif /* #ifdef CONFIG_TRIVIAL_PREEMPT_RCU */ + struct sched_info sched_info; struct list_head tasks; diff --git a/kernel/rcu/Kconfig.debug b/kernel/rcu/Kconfig.debug index 625d75392647..e078e988773d 100644 --- a/kernel/rcu/Kconfig.debug +++ b/kernel/rcu/Kconfig.debug @@ -228,4 +228,15 @@ config RCU_DYNTICKS_TORTURE This has no value for production and is only for testing. +config TRIVIAL_PREEMPT_RCU + bool "Textbook trivial preemptible RCU in rcutorture" + depends on RCU_EXPERT && RCU_TORTURE_TEST + default n + help + This option enables a textbook preemptible RCU that is + implemented in rcutorture. Its sole purpose is to validate + code used in books, papers, and presentations. + + This has no value for production and is only for testing. + endmenu # "RCU Debugging" diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 9b10b57b79ad..fa6d30ce73d1 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -691,4 +691,8 @@ int rcu_stall_notifier_call_chain(unsigned long val, void *v); static inline int rcu_stall_notifier_call_chain(unsigned long val, void *v) { return NOTIFY_DONE; } #endif // #else // #if defined(CONFIG_RCU_STALL_COMMON) && defined(CONFIG_RCU_CPU_STALL_NOTIFIER) +#ifdef CONFIG_TRIVIAL_PREEMPT_RCU +void synchronize_rcu_trivial_preempt(void); +#endif // #ifdef CONFIG_TRIVIAL_PREEMPT_RCU + #endif /* __LINUX_RCU_H */ diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 8a9282a0245c..3c272413666b 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -1061,6 +1061,61 @@ static struct rcu_torture_ops trivial_ops = { .name = "trivial" }; +#ifdef CONFIG_TRIVIAL_PREEMPT_RCU + +/* + * Definitions for trivial CONFIG_PREEMPT=y torture testing. This + * implementation does not work well with large numbers of tasks or with + * long-term preemption. Either or both get you RCU CPU stall warnings. + */ + +static void rcu_sync_torture_init_trivial_preempt(void) +{ + rcu_sync_torture_init(); + if (WARN_ONCE(onoff_interval || shuffle_interval, "%s: Non-zero onoff_interval (%d) or shuffle_interval (%d) breaks trivial RCU, resetting to zero", __func__, onoff_interval, shuffle_interval)) { + onoff_interval = 0; + shuffle_interval = 0; + } +} + +static int rcu_torture_read_lock_trivial_preempt(void) +{ + struct task_struct *t = current; + + WRITE_ONCE(t->rcu_trivial_preempt_nesting, t->rcu_trivial_preempt_nesting + 1); + smp_mb(); + return 0; +} + +static void rcu_torture_read_unlock_trivial_preempt(int idx) +{ + struct task_struct *t = current; + + smp_store_release(&t->rcu_trivial_preempt_nesting, t->rcu_trivial_preempt_nesting - 1); +} + +static struct rcu_torture_ops trivial_preempt_ops = { + .ttype = RCU_TRIVIAL_FLAVOR, + .init = rcu_sync_torture_init_trivial_preempt, + .readlock = rcu_torture_read_lock_trivial_preempt, + .read_delay = rcu_read_delay, // just reuse rcu's version. + .readunlock = rcu_torture_read_unlock_trivial_preempt, + .readlock_held = torture_readlock_not_held, + .get_gp_seq = rcu_no_completed, + .sync = synchronize_rcu_trivial_preempt, + .exp_sync = synchronize_rcu_trivial_preempt, + .irq_capable = 0, // In theory it should be, but let's keep it trivial. + .name = "trivial-preempt" +}; + +#define TRIVIAL_PREEMPT_OPS &trivial_preempt_ops, + +#else // #ifdef CONFIG_TRIVIAL_PREEMPT_RCU + +#define TRIVIAL_PREEMPT_OPS + +#endif // #else // #ifdef CONFIG_TRIVIAL_PREEMPT_RCU + #ifdef CONFIG_TASKS_RCU /* @@ -4449,7 +4504,7 @@ rcu_torture_init(void) static struct rcu_torture_ops *torture_ops[] = { &rcu_ops, &rcu_busted_ops, &srcu_ops, &srcud_ops, &busted_srcud_ops, TASKS_OPS TASKS_RUDE_OPS TASKS_TRACING_OPS - &trivial_ops, + &trivial_ops, TRIVIAL_PREEMPT_OPS }; if (!torture_init_begin(torture_type, verbose)) diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index d98a5c38e19c..b62735a67884 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -538,6 +538,28 @@ long torture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask, bool do EXPORT_SYMBOL_GPL(torture_sched_setaffinity); #endif +#if IS_ENABLED(CONFIG_TRIVIAL_PREEMPT_RCU) +// Trivial and stupid grace-period wait. Defined here so that lockdep +// kernels can find tasklist_lock. +void synchronize_rcu_trivial_preempt(void) +{ + struct task_struct *g; + struct task_struct *t; + + smp_mb(); // Order prior accesses before grace-period start. + rcu_read_lock(); // Protect task list. + for_each_process_thread(g, t) { + if (t == current) + continue; // Don't deadlock on ourselves! + // Order later rcu_read_lock() on other tasks after QS. + while (smp_load_acquire(&t->rcu_trivial_preempt_nesting)) + continue; + } + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(synchronize_rcu_trivial_preempt); +#endif // #if IS_ENABLED(CONFIG_TRIVIAL_PREEMPT_RCU) + int rcu_cpu_stall_notifiers __read_mostly; // !0 = provide stall notifiers (rarely useful) EXPORT_SYMBOL_GPL(rcu_cpu_stall_notifiers); diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL-PREEMPT b/tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL-PREEMPT new file mode 100644 index 000000000000..8230b14bfe68 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL-PREEMPT @@ -0,0 +1,12 @@ +CONFIG_SMP=y +CONFIG_NR_CPUS=8 +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +CONFIG_HZ_PERIODIC=n +CONFIG_NO_HZ_IDLE=y +CONFIG_NO_HZ_FULL=n +CONFIG_DEBUG_LOCK_ALLOC=n +CONFIG_DEBUG_OBJECTS_RCU_HEAD=n +CONFIG_RCU_EXPERT=y +CONFIG_TRIVIAL_PREEMPT_RCU=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL-PREEMPT.boot b/tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL-PREEMPT.boot new file mode 100644 index 000000000000..299cd3a12df6 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL-PREEMPT.boot @@ -0,0 +1,3 @@ +rcutorture.torture_type=trivial-preempt +rcutorture.onoff_interval=0 +rcutorture.shuffle_interval=0 From 69642000bbc57c2e42708d7186b3ba0deca53f6d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 28 Dec 2025 16:27:18 -0800 Subject: [PATCH 02/18] kvm-check-branches.sh: Remove in favor of kvm-series.sh The kvm-series.sh script is an order-of-magnitude optimization of kvm-check-branches.sh, so remove the old script. Signed-off-by: Paul E. McKenney Signed-off-by: Joel Fernandes --- .../rcutorture/bin/kvm-check-branches.sh | 102 ------------------ 1 file changed, 102 deletions(-) delete mode 100755 tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh diff --git a/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh b/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh deleted file mode 100755 index ed0ec7f0927e..000000000000 --- a/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh +++ /dev/null @@ -1,102 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0+ -# -# Run a group of kvm.sh tests on the specified commits. This currently -# unconditionally does three-minute runs on each scenario in CFLIST, -# taking advantage of all available CPUs and trusting the "make" utility. -# In the short term, adjustments can be made by editing this script and -# CFLIST. If some adjustments appear to have ongoing value, this script -# might grow some command-line arguments. -# -# Usage: kvm-check-branches.sh commit1 commit2..commit3 commit4 ... -# -# This script considers its arguments one at a time. If more elaborate -# specification of commits is needed, please use "git rev-list" to -# produce something that this simple script can understand. The reason -# for retaining the simplicity is that it allows the user to more easily -# see which commit came from which branch. -# -# This script creates a yyyy.mm.dd-hh.mm.ss-group entry in the "res" -# directory. The calls to kvm.sh create the usual entries, but this script -# moves them under the yyyy.mm.dd-hh.mm.ss-group entry, each in its own -# directory numbered in run order, that is, "0001", "0002", and so on. -# For successful runs, the large build artifacts are removed. Doing this -# reduces the disk space required by about two orders of magnitude for -# successful runs. -# -# Copyright (C) Facebook, 2020 -# -# Authors: Paul E. McKenney - -if ! git status > /dev/null 2>&1 -then - echo '!!!' This script needs to run in a git archive. 1>&2 - echo '!!!' Giving up. 1>&2 - exit 1 -fi - -# Remember where we started so that we can get back at the end. -curcommit="`git status | head -1 | awk '{ print $NF }'`" - -nfail=0 -ntry=0 -resdir="tools/testing/selftests/rcutorture/res" -ds="`date +%Y.%m.%d-%H.%M.%S`-group" -if ! test -e $resdir -then - mkdir $resdir || : -fi -mkdir $resdir/$ds -echo Results directory: $resdir/$ds - -RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE -PATH=${RCUTORTURE}/bin:$PATH; export PATH -. functions.sh -echo Using all `identify_qemu_vcpus` CPUs. - -# Each pass through this loop does one command-line argument. -for gitbr in $@ -do - echo ' --- git branch ' $gitbr - - # Each pass through this loop tests one commit. - for i in `git rev-list "$gitbr"` - do - ntry=`expr $ntry + 1` - idir=`awk -v ntry="$ntry" 'END { printf "%04d", ntry; }' < /dev/null` - echo ' --- commit ' $i from branch $gitbr - date - mkdir $resdir/$ds/$idir - echo $gitbr > $resdir/$ds/$idir/gitbr - echo $i >> $resdir/$ds/$idir/gitbr - - # Test the specified commit. - git checkout $i > $resdir/$ds/$idir/git-checkout.out 2>&1 - echo git checkout return code: $? "(Commit $ntry: $i)" - kvm.sh --allcpus --duration 3 --trust-make --datestamp "$ds/$idir" > $resdir/$ds/$idir/kvm.sh.out 2>&1 - ret=$? - echo kvm.sh return code $ret for commit $i from branch $gitbr - echo Run results: $resdir/$ds/$idir - if test "$ret" -ne 0 - then - # Failure, so leave all evidence intact. - nfail=`expr $nfail + 1` - else - # Success, so remove large files to save about 1GB. - ( cd $resdir/$ds/$idir/$rrd; rm -f */vmlinux */bzImage */System.map */Module.symvers ) - fi - done -done -date - -# Go back to the original commit. -git checkout "$curcommit" - -if test $nfail -ne 0 -then - echo '!!! ' $nfail failures in $ntry 'runs!!!' - exit 1 -else - echo No failures in $ntry runs. - exit 0 -fi From 08d5cade666dc4a0f8e9a43a738796a92336f276 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 19 Jan 2026 20:33:48 -0800 Subject: [PATCH 03/18] torture: Make hangs more visible in torture.sh output This commit labels "QEMU killed" lines so that they will be picked up by torture.sh processing. Signed-off-by: Paul E. McKenney Signed-off-by: Joel Fernandes --- tools/testing/selftests/rcutorture/bin/kvm-recheck.sh | 2 +- tools/testing/selftests/rcutorture/bin/parse-console.sh | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh index de65d77b47ff..4791774b8485 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh @@ -49,7 +49,7 @@ do then if test -f "$i/qemu-retval" && test "`cat $i/qemu-retval`" -eq 137 then - echo QEMU killed + echo "Summary: Potential hang (QEMU killed)" fi configcheck.sh $i/.config $i/ConfigFragment > $i/ConfigFragment.diags 2>&1 if grep -q '^CONFIG_KCSAN=y$' $i/ConfigFragment.input diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh index 21e6ba3615f6..be1e943ca4d5 100755 --- a/tools/testing/selftests/rcutorture/bin/parse-console.sh +++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh @@ -113,7 +113,6 @@ then then print_warning $title `cat $T.seq` fi - exit 2 fi fi | tee -a $file.diags From 6778178c3b07c926d8a9af515c5af73f6bdebacf Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 26 Jan 2026 17:50:48 -0800 Subject: [PATCH 04/18] torture: Print informative message for test without recheck file If a type of torture test lacks a recheck file, a bash diagnostic is printed, which looks like a torture-test bug. This commit gets rid of this false positive by explicitly checking for the file, invoking it if it exists, otherwise printing an informative non-diagnostic message. Signed-off-by: Paul E. McKenney Signed-off-by: Joel Fernandes --- tools/testing/selftests/rcutorture/bin/kvm-recheck.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh index 4791774b8485..63bbbdd5f4ef 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh @@ -39,7 +39,12 @@ do X*) ;; *) - kvm-recheck-${TORTURE_SUITE}.sh $i + if test -f tools/testing/selftests/rcutorture/bin/kvm-recheck-${TORTURE_SUITE}.sh + then + kvm-recheck-${TORTURE_SUITE}.sh $i + else + echo No kvm-recheck-${TORTURE_SUITE}.sh, so no ${TORTURE_SUITE}-specific analysis. + fi esac if test -f "$i/qemu-retval" && test "`cat $i/qemu-retval`" -ne 0 && test "`cat $i/qemu-retval`" -ne 137 then From df6e6ae18fe776e1ae5dfa8e5104980df608912d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 28 Jan 2026 12:42:24 -0800 Subject: [PATCH 05/18] rcutorture: Fix numeric "test" comparison in srcu_lockdep.sh This commit switches from "-eq" to "=" to handle the non-numeric comparisons in srcu_lockdep.sh. While in the area, adjust SRCU flavor to improve coverage. Signed-off-by: Paul E. McKenney Signed-off-by: Joel Fernandes --- tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh b/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh index 208be7d09a61..4e98c697def4 100755 --- a/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh +++ b/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh @@ -50,7 +50,7 @@ do do err= val=$((d*1000+t*10+c)) - tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 5s --configs "SRCU-P" --kconfig "CONFIG_FORCE_NEED_SRCU_NMI_SAFE=y" --bootargs "rcutorture.test_srcu_lockdep=$val rcutorture.reader_flavor=0x2" --trust-make --datestamp "$ds/$val" > "$T/kvm.sh.out" 2>&1 + tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 5s --configs "SRCU-P" --kconfig "CONFIG_FORCE_NEED_SRCU_NMI_SAFE=y" --bootargs "rcutorture.test_srcu_lockdep=$val rcutorture.reader_flavor=0x4" --trust-make --datestamp "$ds/$val" > "$T/kvm.sh.out" 2>&1 ret=$? mv "$T/kvm.sh.out" "$RCUTORTURE/res/$ds/$val" if ! grep -q '^CONFIG_PROVE_LOCKING=y' .config @@ -92,12 +92,12 @@ do nerrs=$((nerrs+1)) err=1 fi - if test "$val" -eq 0xf && test "$ret" -eq 0 + if test "$val" = 0xf && test "$ret" -eq 0 then err=1 echo -n Unexpected success for > "$RCUTORTURE/res/$ds/$val/kvm.sh.err" fi - if test "$val" -eq 0x1 && test "$ret" -ne 0 + if test "$val" = 0x1 && test "$ret" -ne 0 then err=1 echo -n Unexpected failure for > "$RCUTORTURE/res/$ds/$val/kvm.sh.err" From b0c8dd5097aaa7bfc70c8933de6be0dcdc995592 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 5 Feb 2026 13:43:32 -0800 Subject: [PATCH 06/18] refscale: Ditch ref_scale_shutdown in favor of torture_shutdown_init() The torture_shutdown_init() function spawns a shutdown kthread in a manner very similar to that implemented by ref_scale_shutdown(). This commit therefore re-implements ref_scale_shutdown in terms of torture_shutdown_init(). The initial draft of this patch was generated by version 2.1.16 of the Claude AI/LLM, but trained and configured for use by my employer, and prompted to refer to Linux-kernel source code. This initial draft failed to provide a forward reference to ref_scale_cleanup(), passed zero to torture_shutdown_init() for an unwelcome insta-shutdown, and failed to pass the kvm.sh --duration argument in as a refscale module parameter. On the other hand, it did catch the need to NULL main_task on the post-test self-shutdown code path, which I might well have forgotten to do. This version of the patch fixes those problems, and in fact very little of the initial draft remains. Signed-off-by: Paul E. McKenney Signed-off-by: Joel Fernandes --- kernel/rcu/refscale.c | 51 +++++-------------- .../configs/refscale/ver_functions.sh | 2 +- 2 files changed, 15 insertions(+), 38 deletions(-) diff --git a/kernel/rcu/refscale.c b/kernel/rcu/refscale.c index c158b6a947cd..a2d9d75d88a1 100644 --- a/kernel/rcu/refscale.c +++ b/kernel/rcu/refscale.c @@ -92,15 +92,9 @@ torture_param(int, nreaders, -1, "Number of readers, -1 for 75% of CPUs."); torture_param(int, nruns, 30, "Number of experiments to run."); // Reader delay in nanoseconds, 0 for no delay. torture_param(int, readdelay, 0, "Read-side delay in nanoseconds."); - -#ifdef MODULE -# define REFSCALE_SHUTDOWN 0 -#else -# define REFSCALE_SHUTDOWN 1 -#endif - -torture_param(bool, shutdown, REFSCALE_SHUTDOWN, - "Shutdown at end of scalability tests."); +// Maximum shutdown delay in seconds, or zero for no shutdown. +torture_param(int, shutdown_secs, !IS_MODULE(CONFIG_REPRO_TEST) * 300, + "Shutdown at end of scalability tests or at specified timeout (s)."); struct reader_task { struct task_struct *task; @@ -109,12 +103,8 @@ struct reader_task { u64 last_duration_ns; }; -static struct task_struct *shutdown_task; -static wait_queue_head_t shutdown_wq; - static struct task_struct *main_task; static wait_queue_head_t main_wq; -static int shutdown_start; static struct reader_task *reader_tasks; @@ -1357,6 +1347,8 @@ static u64 process_durations(int n) return sum; } +static void ref_scale_cleanup(void); + // The main_func is the main orchestrator, it performs a bunch of // experiments. For every experiment, it orders all the readers // involved to start and waits for them to finish the experiment. It @@ -1443,9 +1435,10 @@ static int main_func(void *arg) oom_exit: // This will shutdown everything including us. - if (shutdown) { - shutdown_start = 1; - wake_up(&shutdown_wq); + if (shutdown_secs) { + main_task = NULL; // Avoid self-kill deadlock. + ref_scale_cleanup(); + kernel_power_off(); } // Wait for torture to stop us @@ -1463,8 +1456,8 @@ static void ref_scale_print_module_parms(const struct ref_scale_ops *cur_ops, const char *tag) { pr_alert("%s" SCALE_FLAG - "--- %s: verbose=%d verbose_batched=%d shutdown=%d holdoff=%d lookup_instances=%ld loops=%d nreaders=%d nruns=%d readdelay=%d\n", scale_type, tag, - verbose, verbose_batched, shutdown, holdoff, lookup_instances, loops, nreaders, nruns, readdelay); + "--- %s: verbose=%d verbose_batched=%d shutdown_secs=%d holdoff=%d lookup_instances=%ld loops=%d nreaders=%d nruns=%d readdelay=%d\n", scale_type, tag, + verbose, verbose_batched, shutdown_secs, holdoff, lookup_instances, loops, nreaders, nruns, readdelay); } static void @@ -1497,19 +1490,6 @@ ref_scale_cleanup(void) torture_cleanup_end(); } -// Shutdown kthread. Just waits to be awakened, then shuts down system. -static int -ref_scale_shutdown(void *arg) -{ - wait_event_idle(shutdown_wq, shutdown_start); - - smp_mb(); // Wake before output. - ref_scale_cleanup(); - kernel_power_off(); - - return -EINVAL; -} - static int __init ref_scale_init(void) { @@ -1553,13 +1533,10 @@ ref_scale_init(void) ref_scale_print_module_parms(cur_ops, "Start of test"); // Shutdown task - if (shutdown) { - init_waitqueue_head(&shutdown_wq); - firsterr = torture_create_kthread(ref_scale_shutdown, NULL, - shutdown_task); + if (shutdown_secs) { + firsterr = torture_shutdown_init(shutdown_secs, ref_scale_cleanup); if (torture_init_error(firsterr)) goto unwind; - schedule_timeout_uninterruptible(1); } // Reader tasks (default to ~75% of online CPUs). @@ -1604,7 +1581,7 @@ ref_scale_init(void) unwind: torture_init_end(); ref_scale_cleanup(); - if (shutdown) { + if (shutdown_secs) { WARN_ON(!IS_MODULE(CONFIG_RCU_REF_SCALE_TEST)); kernel_power_off(); } diff --git a/tools/testing/selftests/rcutorture/configs/refscale/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/refscale/ver_functions.sh index 748465627601..219fac070af2 100644 --- a/tools/testing/selftests/rcutorture/configs/refscale/ver_functions.sh +++ b/tools/testing/selftests/rcutorture/configs/refscale/ver_functions.sh @@ -11,7 +11,7 @@ # # Adds per-version torture-module parameters to kernels supporting them. per_version_boot_params () { - echo refscale.shutdown=1 \ + echo refscale.shutdown_secs=$3 \ refscale.verbose=0 \ $1 } From 359cf5c942b8fce9cf2b7f3c1eb5b8186f0d9b30 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 8 Feb 2026 19:03:30 -0800 Subject: [PATCH 07/18] rcuscale: Ditch rcu_scale_shutdown in favor of torture_shutdown_init() The torture_shutdown_init() function spawns a shutdown kthread in a manner very similar to that implemented by rcu_scale_shutdown(). This commit therefore re-implements rcu_scale_shutdown() in terms of torture_shutdown_init(). This patch was generated by Claude given as input the patch making the same transformation of ref_scale_shutdown(). Signed-off-by: Paul E. McKenney Signed-off-by: Joel Fernandes --- kernel/rcu/rcuscale.c | 78 +++++-------------- .../configs/rcuscale/ver_functions.sh | 2 +- 2 files changed, 22 insertions(+), 58 deletions(-) diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c index 4ac2b134a983..ac0b1c6b7dae 100644 --- a/kernel/rcu/rcuscale.c +++ b/kernel/rcu/rcuscale.c @@ -79,12 +79,6 @@ MODULE_AUTHOR("Paul E. McKenney "); * test-end checks, and the pair of calls through pointers. */ -#ifdef MODULE -# define RCUSCALE_SHUTDOWN 0 -#else -# define RCUSCALE_SHUTDOWN 1 -#endif - torture_param(bool, gp_async, false, "Use asynchronous GP wait primitives"); torture_param(int, gp_async_max, 1000, "Max # outstanding waits per writer"); torture_param(bool, gp_exp, false, "Use expedited GP wait primitives"); @@ -92,8 +86,8 @@ torture_param(int, holdoff, 10, "Holdoff time before test start (s)"); torture_param(int, minruntime, 0, "Minimum run time (s)"); torture_param(int, nreaders, -1, "Number of RCU reader threads"); torture_param(int, nwriters, -1, "Number of RCU updater threads"); -torture_param(bool, shutdown, RCUSCALE_SHUTDOWN, - "Shutdown at end of scalability tests."); +torture_param(int, shutdown_secs, !IS_MODULE(CONFIG_RCU_SCALE_TEST) * 300, + "Shutdown at end of scalability tests or at specified timeout (s)."); torture_param(int, verbose, 1, "Enable verbose debugging printk()s"); torture_param(int, writer_holdoff, 0, "Holdoff (us) between GPs, zero to disable"); torture_param(int, writer_holdoff_jiffies, 0, "Holdoff (jiffies) between GPs, zero to disable"); @@ -123,7 +117,6 @@ static int nrealreaders; static int nrealwriters; static struct task_struct **writer_tasks; static struct task_struct **reader_tasks; -static struct task_struct *shutdown_task; static u64 **writer_durations; static bool *writer_done; @@ -132,7 +125,6 @@ static int *writer_n_durations; static atomic_t n_rcu_scale_reader_started; static atomic_t n_rcu_scale_writer_started; static atomic_t n_rcu_scale_writer_finished; -static wait_queue_head_t shutdown_wq; static u64 t_rcu_scale_writer_started; static u64 t_rcu_scale_writer_finished; static unsigned long b_rcu_gp_test_started; @@ -519,6 +511,8 @@ static void rcu_scale_async_cb(struct rcu_head *rhp) rcu_scale_free(wmbp); } +static void rcu_scale_cleanup(void); + /* * RCU scale writer kthread. Repeatedly does a grace period. */ @@ -622,9 +616,11 @@ rcu_scale_writer(void *arg) b_rcu_gp_test_finished = cur_ops->get_gp_seq(); } - if (shutdown) { + if (shutdown_secs) { + writer_tasks[me] = NULL; smp_mb(); /* Assign before wake. */ - wake_up(&shutdown_wq); + rcu_scale_cleanup(); + kernel_power_off(); } } } @@ -668,8 +664,8 @@ static void rcu_scale_print_module_parms(struct rcu_scale_ops *cur_ops, const char *tag) { pr_alert("%s" SCALE_FLAG - "--- %s: gp_async=%d gp_async_max=%d gp_exp=%d holdoff=%d minruntime=%d nreaders=%d nwriters=%d writer_holdoff=%d writer_holdoff_jiffies=%d verbose=%d shutdown=%d\n", - scale_type, tag, gp_async, gp_async_max, gp_exp, holdoff, minruntime, nrealreaders, nrealwriters, writer_holdoff, writer_holdoff_jiffies, verbose, shutdown); + "--- %s: gp_async=%d gp_async_max=%d gp_exp=%d holdoff=%d minruntime=%d nreaders=%d nwriters=%d writer_holdoff=%d writer_holdoff_jiffies=%d verbose=%d shutdown_secs=%d\n", + scale_type, tag, gp_async, gp_async_max, gp_exp, holdoff, minruntime, nrealreaders, nrealwriters, writer_holdoff, writer_holdoff_jiffies, verbose, shutdown_secs); } /* @@ -722,6 +718,8 @@ static void kfree_call_rcu(struct rcu_head *rh) kfree(obj); } +static void kfree_scale_cleanup(void); + static int kfree_scale_thread(void *arg) { @@ -791,9 +789,11 @@ kfree_scale_thread(void *arg) rcuscale_seq_diff(b_rcu_gp_test_finished, b_rcu_gp_test_started), PAGES_TO_MB(mem_begin - mem_during)); - if (shutdown) { + if (shutdown_secs) { + kfree_reader_tasks[me] = NULL; smp_mb(); /* Assign before wake. */ - wake_up(&shutdown_wq); + kfree_scale_cleanup(); + kernel_power_off(); } } @@ -820,22 +820,6 @@ kfree_scale_cleanup(void) torture_cleanup_end(); } -/* - * shutdown kthread. Just waits to be awakened, then shuts down system. - */ -static int -kfree_scale_shutdown(void *arg) -{ - wait_event_idle(shutdown_wq, - atomic_read(&n_kfree_scale_thread_ended) >= kfree_nrealthreads); - - smp_mb(); /* Wake before output. */ - - kfree_scale_cleanup(); - kernel_power_off(); - return -EINVAL; -} - // Used if doing RCU-kfree'ing via call_rcu(). static unsigned long jiffies_at_lazy_cb; static struct rcu_head lazy_test1_rh; @@ -895,13 +879,10 @@ kfree_scale_init(void) kfree_nrealthreads = compute_real(kfree_nthreads); /* Start up the kthreads. */ - if (shutdown) { - init_waitqueue_head(&shutdown_wq); - firsterr = torture_create_kthread(kfree_scale_shutdown, NULL, - shutdown_task); + if (shutdown_secs) { + firsterr = torture_shutdown_init(shutdown_secs, kfree_scale_cleanup); if (torture_init_error(firsterr)) goto unwind; - schedule_timeout_uninterruptible(1); } pr_alert("kfree object size=%zu, kfree_by_call_rcu=%d\n", @@ -1058,20 +1039,6 @@ rcu_scale_cleanup(void) torture_cleanup_end(); } -/* - * RCU scalability shutdown kthread. Just waits to be awakened, then shuts - * down system. - */ -static int -rcu_scale_shutdown(void *arg) -{ - wait_event_idle(shutdown_wq, atomic_read(&n_rcu_scale_writer_finished) >= nrealwriters); - smp_mb(); /* Wake before output. */ - rcu_scale_cleanup(); - kernel_power_off(); - return -EINVAL; -} - static int __init rcu_scale_init(void) { @@ -1121,13 +1088,10 @@ rcu_scale_init(void) /* Start up the kthreads. */ - if (shutdown) { - init_waitqueue_head(&shutdown_wq); - firsterr = torture_create_kthread(rcu_scale_shutdown, NULL, - shutdown_task); + if (shutdown_secs) { + firsterr = torture_shutdown_init(shutdown_secs, rcu_scale_cleanup); if (torture_init_error(firsterr)) goto unwind; - schedule_timeout_uninterruptible(1); } reader_tasks = kzalloc_objs(reader_tasks[0], nrealreaders); if (reader_tasks == NULL) { @@ -1201,7 +1165,7 @@ rcu_scale_init(void) unwind: torture_init_end(); rcu_scale_cleanup(); - if (shutdown) { + if (shutdown_secs) { WARN_ON(!IS_MODULE(CONFIG_RCU_SCALE_TEST)); kernel_power_off(); } diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/rcuscale/ver_functions.sh index 28070b43f017..b78ddc243d89 100644 --- a/tools/testing/selftests/rcutorture/configs/rcuscale/ver_functions.sh +++ b/tools/testing/selftests/rcutorture/configs/rcuscale/ver_functions.sh @@ -11,7 +11,7 @@ # # Adds per-version torture-module parameters to kernels supporting them. per_version_boot_params () { - echo rcuscale.shutdown=1 \ + echo rcuscale.shutdown_secs=$3 \ rcuscale.verbose=0 \ $1 } From d978d3fc0488691f3b10919594d1d7d465fa568b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 27 Jan 2026 15:24:24 -0800 Subject: [PATCH 08/18] srcu: Fix SRCU read flavor macro comments The SRCU_READ_FLAVOR_FAST and SRCU_READ_FLAVOR_FAST_UPDOWN comments need repair. The former fails to not that SRCU-fast can be used in NMI handlers, and the latter says that it goes with srcu_read_lock_fast() when it really goes with srcu_read_lock_fast_updown(). This commit therefore fixes both comments. Signed-off-by: Paul E. McKenney Signed-off-by: Joel Fernandes --- include/linux/srcu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/srcu.h b/include/linux/srcu.h index bb44a0bd7696..81b1938512d5 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -69,8 +69,8 @@ int init_srcu_struct_fast_updown(struct srcu_struct *ssp); #define SRCU_READ_FLAVOR_NORMAL 0x1 // srcu_read_lock(). #define SRCU_READ_FLAVOR_NMI 0x2 // srcu_read_lock_nmisafe(). // 0x4 // SRCU-lite is no longer with us. -#define SRCU_READ_FLAVOR_FAST 0x4 // srcu_read_lock_fast(). -#define SRCU_READ_FLAVOR_FAST_UPDOWN 0x8 // srcu_read_lock_fast(). +#define SRCU_READ_FLAVOR_FAST 0x4 // srcu_read_lock_fast(), also NMI-safe. +#define SRCU_READ_FLAVOR_FAST_UPDOWN 0x8 // srcu_read_lock_fast_updown(). #define SRCU_READ_FLAVOR_ALL (SRCU_READ_FLAVOR_NORMAL | SRCU_READ_FLAVOR_NMI | \ SRCU_READ_FLAVOR_FAST | SRCU_READ_FLAVOR_FAST_UPDOWN) // All of the above. From 4968907016c2a54800a67273b92b3b66245bd372 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 6 Jan 2026 10:28:10 -0800 Subject: [PATCH 09/18] srcu: Fix s/they disables/they disable/ typo in srcu_read_unlock_fast() Typo fix in srcu_read_unlock_fast() header comment. Reported-by: Mathieu Desnoyers Signed-off-by: Paul E. McKenney Reviewed-by: Mathieu Desnoyers Signed-off-by: Joel Fernandes --- include/linux/srcutree.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index be76fa4fc170..fd1a9270cb9a 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -260,7 +260,7 @@ static inline struct srcu_ctr __percpu *__srcu_ctr_to_ptr(struct srcu_struct *ss * srcu_read_unlock_fast(). * * Note that both this_cpu_inc() and atomic_long_inc() are RCU read-side - * critical sections either because they disables interrupts, because + * critical sections either because they disable interrupts, because * they are a single instruction, or because they are read-modify-write * atomic operations, depending on the whims of the architecture. * This matters because the SRCU-fast grace-period mechanism uses either From ad6ef775cbefffd6c614dfc57429c364192b5de0 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 14 Jan 2026 16:18:30 -0800 Subject: [PATCH 10/18] rcu-tasks: Document that RCU Tasks Trace grace periods now imply RCU grace periods Now that RCU Tasks Trace is implemented in terms of SRCU-fast, the fact that each SRCU-fast grace period implies at least two RCU grace periods in turn means that each RCU Tasks Trace grace period implies at least two grace periods. This commit therefore updates the documentation accordingly. Reviewed-by: Frederic Weisbecker Reported-by: Alexei Starovoitov Signed-off-by: Paul E. McKenney Signed-off-by: Joel Fernandes --- Documentation/RCU/Design/Requirements/Requirements.rst | 7 +++++++ include/linux/rcupdate.h | 9 +++------ 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/Documentation/RCU/Design/Requirements/Requirements.rst b/Documentation/RCU/Design/Requirements/Requirements.rst index b5cdbba3ec2e..4d886e7c7a95 100644 --- a/Documentation/RCU/Design/Requirements/Requirements.rst +++ b/Documentation/RCU/Design/Requirements/Requirements.rst @@ -2787,6 +2787,13 @@ which avoids the read-side memory barriers, at least for architectures that apply noinstr to kernel entry/exit code (or that build with ``CONFIG_TASKS_TRACE_RCU_NO_MB=y``. +Now that the implementation is based on SRCU-fast, a call +to synchronize_rcu_tasks_trace() implies at least one call to +synchronize_rcu(), that is, every Tasks Trace RCU grace period contains +at least one plain vanilla RCU grace period. Should there ever +be a synchronize_rcu_tasks_trace_expedited(), this guarantee would +*not* necessarily apply to this hypothetical API member. + The tasks-trace-RCU API is also reasonably compact, consisting of rcu_read_lock_trace(), rcu_read_unlock_trace(), rcu_read_lock_trace_held(), call_rcu_tasks_trace(), diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 04f3f86a4145..18a85c30fd4f 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -208,12 +208,9 @@ static inline void exit_tasks_rcu_finish(void) { } /** * rcu_trace_implies_rcu_gp - does an RCU Tasks Trace grace period imply an RCU grace period? * - * As an accident of implementation, an RCU Tasks Trace grace period also - * acts as an RCU grace period. However, this could change at any time. - * Code relying on this accident must call this function to verify that - * this accident is still happening. - * - * You have been warned! + * Now that RCU Tasks Trace is implemented in terms of SRCU-fast, a + * call to synchronize_rcu_tasks_trace() is guaranteed to imply at least + * one call to synchronize_rcu(). */ static inline bool rcu_trace_implies_rcu_gp(void) { return true; } From 18a6770f1f9899d3ce2d54dba0bdaa5a7e2bdd24 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Mon, 5 Jan 2026 03:42:41 -0500 Subject: [PATCH 11/18] rcutorture: Add NOCB01 config for RCU_LAZY torture testing Add new rcutorture config NOCB01 that enables CONFIG_RCU_LAZY combined with CONFIG_RCU_NOCB_CPU to exercise the lazy callback code paths in the NOCB implementation. This config exercises lazy callback paths not covered by other configs, including lazy-only wake and lazy defer logic. This config is not added to CFLIST to avoid increasing the default test duration; it can be run explicitly when lazy callback testing is needed. Acked-by: Frederic Weisbecker Reviewed-by: Paul E. McKenney Tested-by: Paul E. McKenney Signed-off-by: Joel Fernandes --- .../selftests/rcutorture/configs/rcu/NOCB01 | 21 +++++++++++++++++++ .../rcutorture/configs/rcu/NOCB01.boot | 2 ++ 2 files changed, 23 insertions(+) create mode 100644 tools/testing/selftests/rcutorture/configs/rcu/NOCB01 create mode 100644 tools/testing/selftests/rcutorture/configs/rcu/NOCB01.boot diff --git a/tools/testing/selftests/rcutorture/configs/rcu/NOCB01 b/tools/testing/selftests/rcutorture/configs/rcu/NOCB01 new file mode 100644 index 000000000000..bbe6d28210ab --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/NOCB01 @@ -0,0 +1,21 @@ +CONFIG_SMP=y +CONFIG_NR_CPUS=8 +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +#CHECK#CONFIG_PREEMPT_RCU=y +CONFIG_HZ_PERIODIC=n +CONFIG_NO_HZ_IDLE=y +CONFIG_NO_HZ_FULL=n +CONFIG_RCU_TRACE=y +CONFIG_HOTPLUG_CPU=y +CONFIG_RCU_FANOUT=3 +CONFIG_RCU_FANOUT_LEAF=2 +CONFIG_RCU_NOCB_CPU=y +CONFIG_DEBUG_LOCK_ALLOC=n +CONFIG_PROVE_LOCKING=n +CONFIG_RCU_BOOST=n +CONFIG_DEBUG_OBJECTS_RCU_HEAD=n +CONFIG_RCU_EXPERT=y +CONFIG_RCU_EQS_DEBUG=y +CONFIG_RCU_LAZY=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/NOCB01.boot b/tools/testing/selftests/rcutorture/configs/rcu/NOCB01.boot new file mode 100644 index 000000000000..5130bc84c435 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/NOCB01.boot @@ -0,0 +1,2 @@ +rcupdate.rcu_self_test=1 +rcu_nocbs=all From 6c3d9ad795a212ccfdfc0359524ab0d040c58757 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Mon, 5 Jan 2026 03:42:56 -0500 Subject: [PATCH 12/18] rcutorture: Add NOCB02 config for nocb poll mode testing Add new rcutorture config NOCB02 that enables rcu_nocb_poll boot parameter combined with CONFIG_RCU_NOCB_CPU to exercise the polling mode code paths in the NOCB implementation. This config exercises poll-mode paths not covered by other configs, where callback invocation uses active polling instead of kthread wakeups. This config is not added to CFLIST to avoid increasing the default test duration; it can be run explicitly when poll-mode testing is needed. Acked-by: Frederic Weisbecker Reviewed-by: Paul E. McKenney Tested-by: Paul E. McKenney Signed-off-by: Joel Fernandes --- .../selftests/rcutorture/configs/rcu/NOCB02 | 20 +++++++++++++++++++ .../rcutorture/configs/rcu/NOCB02.boot | 3 +++ 2 files changed, 23 insertions(+) create mode 100644 tools/testing/selftests/rcutorture/configs/rcu/NOCB02 create mode 100644 tools/testing/selftests/rcutorture/configs/rcu/NOCB02.boot diff --git a/tools/testing/selftests/rcutorture/configs/rcu/NOCB02 b/tools/testing/selftests/rcutorture/configs/rcu/NOCB02 new file mode 100644 index 000000000000..4c2b8cd6d8fd --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/NOCB02 @@ -0,0 +1,20 @@ +CONFIG_SMP=y +CONFIG_NR_CPUS=8 +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +#CHECK#CONFIG_PREEMPT_RCU=y +CONFIG_HZ_PERIODIC=n +CONFIG_NO_HZ_IDLE=y +CONFIG_NO_HZ_FULL=n +CONFIG_RCU_TRACE=y +CONFIG_HOTPLUG_CPU=y +CONFIG_RCU_FANOUT=3 +CONFIG_RCU_FANOUT_LEAF=2 +CONFIG_RCU_NOCB_CPU=y +CONFIG_DEBUG_LOCK_ALLOC=n +CONFIG_PROVE_LOCKING=n +CONFIG_RCU_BOOST=n +CONFIG_DEBUG_OBJECTS_RCU_HEAD=n +CONFIG_RCU_EXPERT=y +CONFIG_RCU_EQS_DEBUG=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/NOCB02.boot b/tools/testing/selftests/rcutorture/configs/rcu/NOCB02.boot new file mode 100644 index 000000000000..c212ae299b0b --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/NOCB02.boot @@ -0,0 +1,3 @@ +rcupdate.rcu_self_test=1 +rcu_nocbs=all +rcu_nocb_poll From 3e3d7d8f3ad35deaf3f8150f66555ef54cf1754e Mon Sep 17 00:00:00 2001 From: Zqiang Date: Mon, 5 Jan 2026 09:19:51 +0800 Subject: [PATCH 13/18] rcu-tasks: Remove unnecessary smp_store_release() in cblist_init_generic() The cblist_init_generic() is executed during the CPU early boot phase due to commit:30ef09635b9e ("rcu-tasks: Initialize callback lists at rcu_init() time"), at this time, only one boot CPU is online and the irq is disabled. this commit therefore use routine assignment replace of smp_store_release() and WRITE_ONCE() in the cblist_init_generic(). Signed-off-by: Zqiang Reviewed-by: Paul E. McKenney Reviewed-by: Frederic Weisbecker Signed-off-by: Joel Fernandes --- kernel/rcu/tasks.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index 2b55e6acf3c1..48f0d803c8e2 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -291,9 +291,9 @@ static void cblist_init_generic(struct rcu_tasks *rtp) shift = ilog2(rcu_task_cpu_ids / lim); if (((rcu_task_cpu_ids - 1) >> shift) >= lim) shift++; - WRITE_ONCE(rtp->percpu_enqueue_shift, shift); - WRITE_ONCE(rtp->percpu_dequeue_lim, lim); - smp_store_release(&rtp->percpu_enqueue_lim, lim); + rtp->percpu_enqueue_shift = shift; + rtp->percpu_dequeue_lim = lim; + rtp->percpu_enqueue_lim = lim; pr_info("%s: Setting shift to %d and lim to %d rcu_task_cb_adjust=%d rcu_task_cpu_ids=%d.\n", rtp->name, data_race(rtp->percpu_enqueue_shift), data_race(rtp->percpu_enqueue_lim), From 18d01ff3b9812b785673689780bb3868c4c1e2fa Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Sat, 3 Jan 2026 10:37:54 -0500 Subject: [PATCH 14/18] rcu/nocb: Consolidate rcu_nocb_cpu_offload/deoffload functions The rcu_nocb_cpu_offload() and rcu_nocb_cpu_deoffload() functions are nearly duplicates. Therefore, extract the common logic into rcu_nocb_cpu_toggle_offload() which takes an 'offload' boolean, and make both exported functions simple wrappers. This eliminates a bunch of duplicate code at the call sites, namely mutex locking, CPU hotplug locking and CPU online checks. Reviewed-by: Frederic Weisbecker Signed-off-by: Joel Fernandes --- kernel/rcu/tree_nocb.h | 70 +++++++++++++++++++++--------------------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h index b3337c7231cc..d5e4d23090e8 100644 --- a/kernel/rcu/tree_nocb.h +++ b/kernel/rcu/tree_nocb.h @@ -1081,30 +1081,6 @@ static int rcu_nocb_rdp_deoffload(struct rcu_data *rdp) return 0; } -int rcu_nocb_cpu_deoffload(int cpu) -{ - struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); - int ret = 0; - - cpus_read_lock(); - mutex_lock(&rcu_state.nocb_mutex); - if (rcu_rdp_is_offloaded(rdp)) { - if (!cpu_online(cpu)) { - ret = rcu_nocb_rdp_deoffload(rdp); - if (!ret) - cpumask_clear_cpu(cpu, rcu_nocb_mask); - } else { - pr_info("NOCB: Cannot CB-deoffload online CPU %d\n", rdp->cpu); - ret = -EINVAL; - } - } - mutex_unlock(&rcu_state.nocb_mutex); - cpus_read_unlock(); - - return ret; -} -EXPORT_SYMBOL_GPL(rcu_nocb_cpu_deoffload); - static bool rcu_nocb_rdp_offload_wait_cond(struct rcu_data *rdp) { unsigned long flags; @@ -1149,28 +1125,52 @@ static int rcu_nocb_rdp_offload(struct rcu_data *rdp) return 0; } -int rcu_nocb_cpu_offload(int cpu) +/* Common helper for CPU offload/deoffload operations. */ +static int rcu_nocb_cpu_toggle_offload(int cpu, bool offload) { struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); int ret = 0; cpus_read_lock(); mutex_lock(&rcu_state.nocb_mutex); - if (!rcu_rdp_is_offloaded(rdp)) { - if (!cpu_online(cpu)) { - ret = rcu_nocb_rdp_offload(rdp); - if (!ret) - cpumask_set_cpu(cpu, rcu_nocb_mask); - } else { - pr_info("NOCB: Cannot CB-offload online CPU %d\n", rdp->cpu); - ret = -EINVAL; - } + + /* Already in desired state, nothing to do. */ + if (rcu_rdp_is_offloaded(rdp) == offload) + goto out_unlock; + + if (cpu_online(cpu)) { + pr_info("NOCB: Cannot CB-%soffload online CPU %d\n", + offload ? "" : "de", rdp->cpu); + ret = -EINVAL; + goto out_unlock; } + + if (offload) { + ret = rcu_nocb_rdp_offload(rdp); + if (!ret) + cpumask_set_cpu(cpu, rcu_nocb_mask); + } else { + ret = rcu_nocb_rdp_deoffload(rdp); + if (!ret) + cpumask_clear_cpu(cpu, rcu_nocb_mask); + } + +out_unlock: mutex_unlock(&rcu_state.nocb_mutex); cpus_read_unlock(); - return ret; } + +int rcu_nocb_cpu_deoffload(int cpu) +{ + return rcu_nocb_cpu_toggle_offload(cpu, false /* de-offload */); +} +EXPORT_SYMBOL_GPL(rcu_nocb_cpu_deoffload); + +int rcu_nocb_cpu_offload(int cpu) +{ + return rcu_nocb_cpu_toggle_offload(cpu, true /* offload */); +} EXPORT_SYMBOL_GPL(rcu_nocb_cpu_offload); #ifdef CONFIG_RCU_LAZY From 2243517a5440caa635b945deb7915397ef39b29b Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Sat, 3 Jan 2026 15:54:37 -0500 Subject: [PATCH 15/18] rcu/nocb: Extract nocb_bypass_needs_flush() to reduce duplication The bypass flush decision logic is duplicated in rcu_nocb_try_bypass() and nocb_gp_wait() with similar conditions. This commit therefore extracts the functionality into a common helper function nocb_bypass_needs_flush() improving the code readability. A flush_faster parameter is added to controlling the flushing thresholds and timeouts. This design was in the original commit d1b222c6be1f ("rcu/nocb: Add bypass callback queueing") to avoid having the GP kthread aggressively flush the bypass queue. Reviewed-by: Frederic Weisbecker Signed-off-by: Joel Fernandes --- kernel/rcu/tree_nocb.h | 51 ++++++++++++++++++++++++++++++------------ 1 file changed, 37 insertions(+), 14 deletions(-) diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h index d5e4d23090e8..1047b30cd46b 100644 --- a/kernel/rcu/tree_nocb.h +++ b/kernel/rcu/tree_nocb.h @@ -378,6 +378,38 @@ static void rcu_nocb_try_flush_bypass(struct rcu_data *rdp, unsigned long j) WARN_ON_ONCE(!rcu_nocb_do_flush_bypass(rdp, NULL, j, false)); } +/* + * Determine if the bypass queue needs to be flushed based on time and size. + * For lazy-only bypass queues, use the lazy flush timeout; otherwise flush + * based on jiffy advancement. The flush_faster controls flush aggressiveness. + */ +static bool nocb_bypass_needs_flush(struct rcu_data *rdp, long bypass_ncbs, + long lazy_ncbs, unsigned long j, + bool flush_faster) +{ + bool bypass_is_lazy; + unsigned long bypass_first; + unsigned long flush_timeout; + long qhimark_thresh; + + if (!bypass_ncbs) + return false; + + qhimark_thresh = flush_faster ? qhimark : 2 * qhimark; + if (bypass_ncbs >= qhimark_thresh) + return true; + + bypass_first = READ_ONCE(rdp->nocb_bypass_first); + bypass_is_lazy = (bypass_ncbs == lazy_ncbs); + + if (bypass_is_lazy) + flush_timeout = rcu_get_jiffies_lazy_flush(); + else + flush_timeout = flush_faster ? 0 : 1; + + return time_after(j, bypass_first + flush_timeout); +} + /* * See whether it is appropriate to use the ->nocb_bypass list in order * to control contention on ->nocb_lock. A limited number of direct @@ -404,7 +436,8 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, unsigned long cur_gp_seq; unsigned long j = jiffies; long ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass); - bool bypass_is_lazy = (ncbs == READ_ONCE(rdp->lazy_len)); + long lazy_len = READ_ONCE(rdp->lazy_len); + bool bypass_is_lazy = (ncbs == lazy_len); lockdep_assert_irqs_disabled(); @@ -456,10 +489,7 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, // If ->nocb_bypass has been used too long or is too full, // flush ->nocb_bypass to ->cblist. - if ((ncbs && !bypass_is_lazy && j != READ_ONCE(rdp->nocb_bypass_first)) || - (ncbs && bypass_is_lazy && - (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + rcu_get_jiffies_lazy_flush()))) || - ncbs >= qhimark) { + if (nocb_bypass_needs_flush(rdp, ncbs, lazy_len, j, true)) { rcu_nocb_lock(rdp); *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist); @@ -673,15 +703,8 @@ static void nocb_gp_wait(struct rcu_data *my_rdp) bypass_ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass); lazy_ncbs = READ_ONCE(rdp->lazy_len); - if (bypass_ncbs && (lazy_ncbs == bypass_ncbs) && - (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + rcu_get_jiffies_lazy_flush()) || - bypass_ncbs > 2 * qhimark)) { - flush_bypass = true; - } else if (bypass_ncbs && (lazy_ncbs != bypass_ncbs) && - (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + 1) || - bypass_ncbs > 2 * qhimark)) { - flush_bypass = true; - } else if (!bypass_ncbs && rcu_segcblist_empty(&rdp->cblist)) { + flush_bypass = nocb_bypass_needs_flush(rdp, bypass_ncbs, lazy_ncbs, j, false); + if (!flush_bypass && !bypass_ncbs && rcu_segcblist_empty(&rdp->cblist)) { rcu_nocb_unlock_irqrestore(rdp, flags); continue; /* No callbacks here, try next. */ } From a18396219ba52b524d8b86bf9e2515b01c068614 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 4 Mar 2026 15:40:39 -0800 Subject: [PATCH 16/18] torture: Avoid modulo-zero error in torture_hrtimeout_ns() Currently, all calls to torture_hrtimeout_ns() either provide a non-zero fuzzt_ns or a NULL trsp, either of which avoids taking the modulus of a zero-valued fuzzt_ns. But this code should do a better job of defending itself, so this commit explicitly checks fuzzt_ns and avoids the modulus when its value is zero. Reviewed-by: Joel Fernandes Signed-off-by: Paul E. McKenney Signed-off-by: Joel Fernandes --- kernel/torture.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/torture.c b/kernel/torture.c index ec3370986976..62c1ac777694 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -93,7 +93,7 @@ int torture_hrtimeout_ns(ktime_t baset_ns, u32 fuzzt_ns, const enum hrtimer_mode { ktime_t hto = baset_ns; - if (trsp) + if (trsp && fuzzt_ns) hto += torture_random(trsp) % fuzzt_ns; set_current_state(TASK_IDLE); return schedule_hrtimeout(&hto, mode); From ab875b3e179ff7ca2a982bc14f7fe810862c7594 Mon Sep 17 00:00:00 2001 From: Gustavo Luiz Duarte Date: Tue, 17 Mar 2026 17:41:17 -0400 Subject: [PATCH 17/18] rcu: Add BOOTPARAM_RCU_STALL_PANIC Kconfig option Add a Kconfig option to set the default value of the kernel.panic_on_rcu_stall sysctl, allowing the kernel to be built with panic-on-RCU-stall enabled by default. This is useful for high-availability systems that require automatic recovery (via panic_timeout) when a CPU stall is detected, without needing userspace to configure the sysctl at boot. This follows the pattern established by BOOTPARAM_SOFTLOCKUP_PANIC and BOOTPARAM_HUNG_TASK_PANIC. The runtime sysctl can still override the Kconfig default. Reviewed-by: Paul E. McKenney Signed-off-by: Gustavo Luiz Duarte Signed-off-by: Joel Fernandes --- kernel/rcu/Kconfig.debug | 24 ++++++++++++++++++++++++ kernel/rcu/tree_stall.h | 2 +- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/kernel/rcu/Kconfig.debug b/kernel/rcu/Kconfig.debug index e078e988773d..35218ba74eb5 100644 --- a/kernel/rcu/Kconfig.debug +++ b/kernel/rcu/Kconfig.debug @@ -175,6 +175,30 @@ config RCU_CPU_STALL_NOTIFIER Say Y here if you want RCU CPU stall notifiers (you don't want them) Say N if you are unsure. +config BOOTPARAM_RCU_STALL_PANIC + bool "Panic (reboot) on RCU CPU stall" + depends on RCU_STALL_COMMON + default n + help + Say Y here to enable the kernel to panic when an RCU CPU stall + is detected. + + The panic can be used in combination with panic_timeout, + to cause the system to reboot automatically after an + RCU CPU stall has been detected. This feature is useful for + high-availability systems that have uptime guarantees and + where a CPU stall must be resolved ASAP. + + The kernel.max_rcu_stall_to_panic sysctl can be used to set + a minimum number of stalls before panicking, allowing the + system to tolerate a given number of RCU CPU stalls before + triggering a panic. + + This setting can be overridden at runtime via the + kernel.panic_on_rcu_stall sysctl parameter. + + Say N if unsure. + config RCU_TRACE bool "Enable tracing for RCU" depends on DEBUG_KERNEL diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index b67532cb8770..43ddabf46b5e 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -17,7 +17,7 @@ // Controlling CPU stall warnings, including delay calculation. /* panic() on RCU Stall sysctl. */ -static int sysctl_panic_on_rcu_stall __read_mostly; +static int sysctl_panic_on_rcu_stall __read_mostly = IS_ENABLED(CONFIG_BOOTPARAM_RCU_STALL_PANIC); static int sysctl_max_rcu_stall_to_panic __read_mostly; static const struct ctl_table rcu_stall_sysctl_table[] = { From 95c7d025cc8c3c6c41206e2a18332eb04878b7ef Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 14 Mar 2026 06:18:48 -0700 Subject: [PATCH 18/18] rcutorture: Test call_srcu() with preemption disabled and not This commit tests invoking call_srcu() with preemption both enabled and disabled, via acquiring of pi lock. [ Joel: reword commit message. ] Signed-off-by: Paul E. McKenney Signed-off-by: Joel Fernandes --- kernel/rcu/rcutorture.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 3c272413666b..5f2848b828dc 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -842,7 +842,14 @@ static unsigned long srcu_torture_completed(void) static void srcu_torture_deferred_free(struct rcu_torture *rp) { + unsigned long flags; + bool lockit = jiffies & 0x1; + + if (lockit) + raw_spin_lock_irqsave(¤t->pi_lock, flags); call_srcu(srcu_ctlp, &rp->rtort_rcu, rcu_torture_cb); + if (lockit) + raw_spin_unlock_irqrestore(¤t->pi_lock, flags); } static void srcu_torture_synchronize(void)