From 535fdfc5a228524552ee8810c9175e877e127c27 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 6 Nov 2025 15:52:13 +0000 Subject: [PATCH 01/11] arm64: Use load LSE atomics for the non-return per-CPU atomic operations The non-return per-CPU this_cpu_*() atomic operations are implemented as STADD/STCLR/STSET when FEAT_LSE is available. On many microarchitecture implementations, these instructions tend to be executed "far" in the interconnect or memory subsystem (unless the data is already in the L1 cache). This is in general more efficient when there is contention as it avoids bouncing cache lines between CPUs. The load atomics (e.g. LDADD without XZR as destination), OTOH, tend to be executed "near" with the data loaded into the L1 cache. STADD executed back to back as in srcu_read_{lock,unlock}*() incur an additional overhead due to the default posting behaviour on several CPU implementations. Since the per-CPU atomics are unlikely to be used concurrently on the same memory location, encourage the hardware to to execute them "near" by issuing load atomics - LDADD/LDCLR/LDSET - with the destination register unused (but not XZR). Signed-off-by: Catalin Marinas Link: https://lore.kernel.org/r/e7d539ed-ced0-4b96-8ecd-048a5b803b85@paulmck-laptop Reported-by: Paul E. McKenney Tested-by: Paul E. McKenney Cc: Will Deacon Reviewed-by: Palmer Dabbelt [will: Add comment and link to the discussion thread] Signed-off-by: Will Deacon --- arch/arm64/include/asm/percpu.h | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h index 9abcc8ef3087..b57b2bb00967 100644 --- a/arch/arm64/include/asm/percpu.h +++ b/arch/arm64/include/asm/percpu.h @@ -77,7 +77,7 @@ __percpu_##name##_case_##sz(void *ptr, unsigned long val) \ " stxr" #sfx "\t%w[loop], %" #w "[tmp], %[ptr]\n" \ " cbnz %w[loop], 1b", \ /* LSE atomics */ \ - #op_lse "\t%" #w "[val], %[ptr]\n" \ + #op_lse "\t%" #w "[val], %" #w "[tmp], %[ptr]\n" \ __nops(3)) \ : [loop] "=&r" (loop), [tmp] "=&r" (tmp), \ [ptr] "+Q"(*(u##sz *)ptr) \ @@ -124,9 +124,16 @@ PERCPU_RW_OPS(8) PERCPU_RW_OPS(16) PERCPU_RW_OPS(32) PERCPU_RW_OPS(64) -PERCPU_OP(add, add, stadd) -PERCPU_OP(andnot, bic, stclr) -PERCPU_OP(or, orr, stset) + +/* + * Use value-returning atomics for CPU-local ops as they are more likely + * to execute "near" to the CPU (e.g. in L1$). + * + * https://lore.kernel.org/r/e7d539ed-ced0-4b96-8ecd-048a5b803b85@paulmck-laptop + */ +PERCPU_OP(add, add, ldadd) +PERCPU_OP(andnot, bic, ldclr) +PERCPU_OP(or, orr, ldset) PERCPU_RET_OP(add, add, ldadd) #undef PERCPU_RW_OPS From eeb8c19896952e18fb538ec76e603884070a6c6a Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Fri, 31 Oct 2025 11:11:37 +0000 Subject: [PATCH 02/11] Revert "ACPI: Suppress misleading SPCR console message when SPCR table is absent" This reverts commit bad3fa2fb9206f4dcec6ddef094ec2fbf6e8dcb2. Commit bad3fa2fb920 ("ACPI: Suppress misleading SPCR console message when SPCR table is absent") mistakenly assumes acpi_parse_spcr() returning 0 to indicate a failure to parse SPCR. While addressing the resultant incorrect logging it was deemed that dropping the message is a better approach as it is not particularly useful. Roll back the commit introducing the bug as a step towards dropping the log message. Link: https://lore.kernel.org/all/aQN0YWUYaPYWpgJM@willie-the-truck/ Signed-off-by: Punit Agrawal Signed-off-by: Will Deacon --- arch/arm64/kernel/acpi.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index 7aca29e1d30b..fd164e8a35b2 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -197,8 +197,6 @@ static int __init acpi_fadt_sanity_check(void) */ void __init acpi_boot_table_init(void) { - int ret; - /* * Enable ACPI instead of device tree unless * - ACPI has been disabled explicitly (acpi=off), or @@ -252,12 +250,10 @@ void __init acpi_boot_table_init(void) * behaviour, use acpi=nospcr to disable console in ACPI SPCR * table as default serial console. */ - ret = acpi_parse_spcr(earlycon_acpi_spcr_enable, + acpi_parse_spcr(earlycon_acpi_spcr_enable, !param_acpi_nospcr); - if (!ret || param_acpi_nospcr || !IS_ENABLED(CONFIG_ACPI_SPCR_TABLE)) - pr_info("Use ACPI SPCR as default console: No\n"); - else - pr_info("Use ACPI SPCR as default console: Yes\n"); + pr_info("Use ACPI SPCR as default console: %s\n", + param_acpi_nospcr ? "No" : "Yes"); if (IS_ENABLED(CONFIG_ACPI_BGRT)) acpi_table_parse(ACPI_SIG_BGRT, acpi_parse_bgrt); From 7991fda619f7251994ab364f03f3e6fc0aa143d9 Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Fri, 31 Oct 2025 11:11:38 +0000 Subject: [PATCH 03/11] arm64: acpi: Drop message logging SPCR default console Commit f5a4af3c7527 ("ACPI: Add acpi=nospcr to disable ACPI SPCR as default console on ARM64") introduced a command line parameter to prevent using SPCR provided console as default. It also introduced a message to log this choice. Drop the message as it is not particularly useful and can be incorrect in situations where no SPCR is provided by the firmware. Link: https://lore.kernel.org/all/aQN0YWUYaPYWpgJM@willie-the-truck/ Signed-off-by: Punit Agrawal Signed-off-by: Will Deacon --- arch/arm64/kernel/acpi.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index fd164e8a35b2..c022c1acb8c7 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -252,8 +252,6 @@ void __init acpi_boot_table_init(void) */ acpi_parse_spcr(earlycon_acpi_spcr_enable, !param_acpi_nospcr); - pr_info("Use ACPI SPCR as default console: %s\n", - param_acpi_nospcr ? "No" : "Yes"); if (IS_ENABLED(CONFIG_ACPI_BGRT)) acpi_table_parse(ACPI_SIG_BGRT, acpi_parse_bgrt); From 0ec364c0c95fc85bcbc88f1a9a06ebe83c88e18c Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Tue, 4 Nov 2025 13:49:47 -0800 Subject: [PATCH 04/11] arm64: kprobes: check the return value of set_memory_rox() Since commit a166563e7ec3 ("arm64: mm: support large block mapping when rodata=full"), __change_memory_common has more chance to fail due to memory allocation failure when splitting page table. So check the return value of set_memory_rox(), then bail out if it fails otherwise we may have RW memory mapping for kprobes insn page. Fixes: 195a1b7d8388 ("arm64: kprobes: call set_memory_rox() for kprobe page") Reviewed-by: Ryan Roberts Reviewed-by: Dev Jain Signed-off-by: Yang Shi Signed-off-by: Will Deacon --- arch/arm64/kernel/probes/kprobes.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index 8ab6104a4883..43a0361a8bf0 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -49,7 +49,10 @@ void *alloc_insn_page(void) addr = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE); if (!addr) return NULL; - set_memory_rox((unsigned long)addr, 1); + if (set_memory_rox((unsigned long)addr, 1)) { + execmem_free(addr); + return NULL; + } return addr; } From ce2b3a50ad922abbba36425343a1bcec46903a26 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Thu, 6 Nov 2025 16:09:41 +0000 Subject: [PATCH 05/11] arm64: mm: Don't sleep in split_kernel_leaf_mapping() when in atomic context It has been reported that split_kernel_leaf_mapping() is trying to sleep in non-sleepable context. It does this when acquiring the pgtable_split_lock mutex, when either CONFIG_DEBUG_PAGEALLOC or CONFIG_KFENCE are enabled, which change linear map permissions within softirq context during memory allocation and/or freeing. All other paths into this function are called from sleepable context and so are safe. But it turns out that the memory for which these 2 features may attempt to modify the permissions is always mapped by pte, so there is no need to attempt to split the mapping. So let's exit early in these cases and avoid attempting to take the mutex. There is one wrinkle to this approach; late-initialized kfence allocates it's pool from the buddy which may be block mapped. So we must hook that allocation and convert it to pte-mappings up front. Previously this was done as a side-effect of kfence protecting all the individual pages in its pool at init-time, but this no longer works due to the added early exit path in split_kernel_leaf_mapping(). So instead, do this via the existing arch_kfence_init_pool() arch hook, and reuse the existing linear_map_split_to_ptes() infrastructure. Closes: https://lore.kernel.org/all/f24b9032-0ec9-47b1-8b95-c0eeac7a31c5@roeck-us.net/ Fixes: a166563e7ec3 ("arm64: mm: support large block mapping when rodata=full") Reported-by: Guenter Roeck Tested-by: Guenter Roeck Signed-off-by: Ryan Roberts Reviewed-by: David Hildenbrand (Red Hat) Reviewed-by: Yang Shi Signed-off-by: Will Deacon --- arch/arm64/include/asm/kfence.h | 3 +- arch/arm64/mm/mmu.c | 92 +++++++++++++++++++++++---------- 2 files changed, 67 insertions(+), 28 deletions(-) diff --git a/arch/arm64/include/asm/kfence.h b/arch/arm64/include/asm/kfence.h index a81937fae9f6..21dbc9dda747 100644 --- a/arch/arm64/include/asm/kfence.h +++ b/arch/arm64/include/asm/kfence.h @@ -10,8 +10,6 @@ #include -static inline bool arch_kfence_init_pool(void) { return true; } - static inline bool kfence_protect_page(unsigned long addr, bool protect) { set_memory_valid(addr, 1, !protect); @@ -25,6 +23,7 @@ static inline bool arm64_kfence_can_set_direct_map(void) { return !kfence_early_init; } +bool arch_kfence_init_pool(void); #else /* CONFIG_KFENCE */ static inline bool arm64_kfence_can_set_direct_map(void) { return false; } #endif /* CONFIG_KFENCE */ diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index b8d37eb037fc..a364ac2c9c61 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -708,6 +708,16 @@ static int split_kernel_leaf_mapping_locked(unsigned long addr) return ret; } +static inline bool force_pte_mapping(void) +{ + bool bbml2 = system_capabilities_finalized() ? + system_supports_bbml2_noabort() : cpu_supports_bbml2_noabort(); + + return (!bbml2 && (rodata_full || arm64_kfence_can_set_direct_map() || + is_realm_world())) || + debug_pagealloc_enabled(); +} + static DEFINE_MUTEX(pgtable_split_lock); int split_kernel_leaf_mapping(unsigned long start, unsigned long end) @@ -723,6 +733,16 @@ int split_kernel_leaf_mapping(unsigned long start, unsigned long end) if (!system_supports_bbml2_noabort()) return 0; + /* + * If the region is within a pte-mapped area, there is no need to try to + * split. Additionally, CONFIG_DEBUG_PAGEALLOC and CONFIG_KFENCE may + * change permissions from atomic context so for those cases (which are + * always pte-mapped), we must not go any further because taking the + * mutex below may sleep. + */ + if (force_pte_mapping() || is_kfence_address((void *)start)) + return 0; + /* * Ensure start and end are at least page-aligned since this is the * finest granularity we can split to. @@ -758,30 +778,30 @@ int split_kernel_leaf_mapping(unsigned long start, unsigned long end) return ret; } -static int __init split_to_ptes_pud_entry(pud_t *pudp, unsigned long addr, - unsigned long next, - struct mm_walk *walk) +static int split_to_ptes_pud_entry(pud_t *pudp, unsigned long addr, + unsigned long next, struct mm_walk *walk) { + gfp_t gfp = *(gfp_t *)walk->private; pud_t pud = pudp_get(pudp); int ret = 0; if (pud_leaf(pud)) - ret = split_pud(pudp, pud, GFP_ATOMIC, false); + ret = split_pud(pudp, pud, gfp, false); return ret; } -static int __init split_to_ptes_pmd_entry(pmd_t *pmdp, unsigned long addr, - unsigned long next, - struct mm_walk *walk) +static int split_to_ptes_pmd_entry(pmd_t *pmdp, unsigned long addr, + unsigned long next, struct mm_walk *walk) { + gfp_t gfp = *(gfp_t *)walk->private; pmd_t pmd = pmdp_get(pmdp); int ret = 0; if (pmd_leaf(pmd)) { if (pmd_cont(pmd)) split_contpmd(pmdp); - ret = split_pmd(pmdp, pmd, GFP_ATOMIC, false); + ret = split_pmd(pmdp, pmd, gfp, false); /* * We have split the pmd directly to ptes so there is no need to @@ -793,9 +813,8 @@ static int __init split_to_ptes_pmd_entry(pmd_t *pmdp, unsigned long addr, return ret; } -static int __init split_to_ptes_pte_entry(pte_t *ptep, unsigned long addr, - unsigned long next, - struct mm_walk *walk) +static int split_to_ptes_pte_entry(pte_t *ptep, unsigned long addr, + unsigned long next, struct mm_walk *walk) { pte_t pte = __ptep_get(ptep); @@ -805,12 +824,18 @@ static int __init split_to_ptes_pte_entry(pte_t *ptep, unsigned long addr, return 0; } -static const struct mm_walk_ops split_to_ptes_ops __initconst = { +static const struct mm_walk_ops split_to_ptes_ops = { .pud_entry = split_to_ptes_pud_entry, .pmd_entry = split_to_ptes_pmd_entry, .pte_entry = split_to_ptes_pte_entry, }; +static int range_split_to_ptes(unsigned long start, unsigned long end, gfp_t gfp) +{ + return walk_kernel_page_table_range_lockless(start, end, + &split_to_ptes_ops, NULL, &gfp); +} + static bool linear_map_requires_bbml2 __initdata; u32 idmap_kpti_bbml2_flag; @@ -847,11 +872,9 @@ static int __init linear_map_split_to_ptes(void *__unused) * PTE. The kernel alias remains static throughout runtime so * can continue to be safely mapped with large mappings. */ - ret = walk_kernel_page_table_range_lockless(lstart, kstart, - &split_to_ptes_ops, NULL, NULL); + ret = range_split_to_ptes(lstart, kstart, GFP_ATOMIC); if (!ret) - ret = walk_kernel_page_table_range_lockless(kend, lend, - &split_to_ptes_ops, NULL, NULL); + ret = range_split_to_ptes(kend, lend, GFP_ATOMIC); if (ret) panic("Failed to split linear map\n"); flush_tlb_kernel_range(lstart, lend); @@ -1002,6 +1025,33 @@ static void __init arm64_kfence_map_pool(phys_addr_t kfence_pool, pgd_t *pgdp) memblock_clear_nomap(kfence_pool, KFENCE_POOL_SIZE); __kfence_pool = phys_to_virt(kfence_pool); } + +bool arch_kfence_init_pool(void) +{ + unsigned long start = (unsigned long)__kfence_pool; + unsigned long end = start + KFENCE_POOL_SIZE; + int ret; + + /* Exit early if we know the linear map is already pte-mapped. */ + if (!system_supports_bbml2_noabort() || force_pte_mapping()) + return true; + + /* Kfence pool is already pte-mapped for the early init case. */ + if (kfence_early_init) + return true; + + mutex_lock(&pgtable_split_lock); + ret = range_split_to_ptes(start, end, GFP_PGTABLE_KERNEL); + mutex_unlock(&pgtable_split_lock); + + /* + * Since the system supports bbml2_noabort, tlb invalidation is not + * required here; the pgtable mappings have been split to pte but larger + * entries may safely linger in the TLB. + */ + + return !ret; +} #else /* CONFIG_KFENCE */ static inline phys_addr_t arm64_kfence_alloc_pool(void) { return 0; } @@ -1009,16 +1059,6 @@ static inline void arm64_kfence_map_pool(phys_addr_t kfence_pool, pgd_t *pgdp) { #endif /* CONFIG_KFENCE */ -static inline bool force_pte_mapping(void) -{ - bool bbml2 = system_capabilities_finalized() ? - system_supports_bbml2_noabort() : cpu_supports_bbml2_noabort(); - - return (!bbml2 && (rodata_full || arm64_kfence_can_set_direct_map() || - is_realm_world())) || - debug_pagealloc_enabled(); -} - static void __init map_mem(pgd_t *pgdp) { static const u64 direct_map_end = _PAGE_END(VA_BITS_MIN); From 40a292f701474f7c21b27911677485efa233e94e Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Thu, 6 Nov 2025 16:09:42 +0000 Subject: [PATCH 06/11] arm64: mm: Optimize range_split_to_ptes() Enter lazy_mmu mode while splitting a range of memory to pte mappings. This causes barriers, which would otherwise be emitted after every pte (and pmd/pud) write, to be deferred until exiting lazy_mmu mode. For large systems, this is expected to significantly speed up fallback to pte-mapping the linear map for the case where the boot CPU has BBML2_NOABORT, but secondary CPUs do not. I haven't directly measured it, but this is equivalent to commit 1fcb7cea8a5f ("arm64: mm: Batch dsb and isb when populating pgtables"). Note that for the path from arch_kfence_init_pool(), we may sleep while allocating memory inside the lazy_mmu mode. Sleeping is not allowed by generic code inside lazy_mmu, but we know that the arm64 implementation is sleep-safe. So this is ok and follows the same pattern already used by split_kernel_leaf_mapping(). Signed-off-by: Ryan Roberts Reviewed-by: Yang Shi Signed-off-by: Will Deacon --- arch/arm64/mm/mmu.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index a364ac2c9c61..652bb8c14035 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -832,8 +832,14 @@ static const struct mm_walk_ops split_to_ptes_ops = { static int range_split_to_ptes(unsigned long start, unsigned long end, gfp_t gfp) { - return walk_kernel_page_table_range_lockless(start, end, + int ret; + + arch_enter_lazy_mmu_mode(); + ret = walk_kernel_page_table_range_lockless(start, end, &split_to_ptes_ops, NULL, &gfp); + arch_leave_lazy_mmu_mode(); + + return ret; } static bool linear_map_requires_bbml2 __initdata; From 53357f14f924a06cced46069755bb10c2a6891c1 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Thu, 6 Nov 2025 16:09:43 +0000 Subject: [PATCH 07/11] arm64: mm: Tidy up force_pte_mapping() Tidy up the implementation of force_pte_mapping() to make it easier to read and introduce the split_leaf_mapping_possible() helper to reduce code duplication in split_kernel_leaf_mapping() and arch_kfence_init_pool(). Suggested-by: David Hildenbrand (Red Hat) Signed-off-by: Ryan Roberts Reviewed-by: David Hildenbrand (Red Hat) Reviewed-by: Yang Shi Signed-off-by: Will Deacon --- arch/arm64/mm/mmu.c | 43 +++++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 652bb8c14035..2ba01dc8ef82 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -710,12 +710,26 @@ static int split_kernel_leaf_mapping_locked(unsigned long addr) static inline bool force_pte_mapping(void) { - bool bbml2 = system_capabilities_finalized() ? + const bool bbml2 = system_capabilities_finalized() ? system_supports_bbml2_noabort() : cpu_supports_bbml2_noabort(); - return (!bbml2 && (rodata_full || arm64_kfence_can_set_direct_map() || - is_realm_world())) || - debug_pagealloc_enabled(); + if (debug_pagealloc_enabled()) + return true; + if (bbml2) + return false; + return rodata_full || arm64_kfence_can_set_direct_map() || is_realm_world(); +} + +static inline bool split_leaf_mapping_possible(void) +{ + /* + * !BBML2_NOABORT systems should never run into scenarios where we would + * have to split. So exit early and let calling code detect it and raise + * a warning. + */ + if (!system_supports_bbml2_noabort()) + return false; + return !force_pte_mapping(); } static DEFINE_MUTEX(pgtable_split_lock); @@ -725,22 +739,11 @@ int split_kernel_leaf_mapping(unsigned long start, unsigned long end) int ret; /* - * !BBML2_NOABORT systems should not be trying to change permissions on - * anything that is not pte-mapped in the first place. Just return early - * and let the permission change code raise a warning if not already - * pte-mapped. + * Exit early if the region is within a pte-mapped area or if we can't + * split. For the latter case, the permission change code will raise a + * warning if not already pte-mapped. */ - if (!system_supports_bbml2_noabort()) - return 0; - - /* - * If the region is within a pte-mapped area, there is no need to try to - * split. Additionally, CONFIG_DEBUG_PAGEALLOC and CONFIG_KFENCE may - * change permissions from atomic context so for those cases (which are - * always pte-mapped), we must not go any further because taking the - * mutex below may sleep. - */ - if (force_pte_mapping() || is_kfence_address((void *)start)) + if (!split_leaf_mapping_possible() || is_kfence_address((void *)start)) return 0; /* @@ -1039,7 +1042,7 @@ bool arch_kfence_init_pool(void) int ret; /* Exit early if we know the linear map is already pte-mapped. */ - if (!system_supports_bbml2_noabort() || force_pte_mapping()) + if (!split_leaf_mapping_possible()) return true; /* Kfence pool is already pte-mapped for the early init case. */ From 62e72463ca714073962eda450e80c5d71dfb0dcb Mon Sep 17 00:00:00 2001 From: shechenglong Date: Fri, 31 Oct 2025 17:15:05 +0800 Subject: [PATCH 08/11] arm64: proton-pack: Drop print when !CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY Following the pattern established with other Spectre mitigations, do not print a message when the CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY Kconfig option is disabled. Suggested-by: Will Deacon Signed-off-by: shechenglong Signed-off-by: Will Deacon --- arch/arm64/kernel/proton-pack.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index f9a32dfde006..d833b7c1bba8 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -1042,8 +1042,6 @@ void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry) if (arm64_get_spectre_v2_state() == SPECTRE_VULNERABLE) { /* No point mitigating Spectre-BHB alone. */ - } else if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY)) { - pr_info_once("spectre-bhb mitigation disabled by compile time option\n"); } else if (cpu_mitigations_off() || __nospectre_bhb) { pr_info_once("spectre-bhb mitigation disabled by command line option\n"); } else if (supports_ecbhb(SCOPE_LOCAL_CPU)) { From 7f1635737823a6c0c412ccf3767a12bec642c10f Mon Sep 17 00:00:00 2001 From: shechenglong Date: Fri, 31 Oct 2025 17:15:06 +0800 Subject: [PATCH 09/11] arm64: proton-pack: Fix hard lockup due to print in scheduler context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Relocate the printk() calls from spectre_v4_mitigations_off() and spectre_v2_mitigations_off() into setup_system_capabilities() function, preventing hard lockups caused by printk calls in scheduler context: | _raw_spin_lock_nested+168 | ttwu_queue+180 (rq_lock(rq, &rf); 2nd acquiring the rq->__lock) | try_to_wake_up+548 | wake_up_process+32 | __up+88 | up+100 | __up_console_sem+96 | console_unlock+696 | vprintk_emit+428 | vprintk_default+64 | vprintk_func+220 | printk+104 | spectre_v4_enable_task_mitigation+344 | __switch_to+100 | __schedule+1028 (rq_lock(rq, &rf); 1st acquiring the rq->__lock) | schedule_idle+48 | do_idle+388 | cpu_startup_entry+44 | secondary_start_kernel+352 Suggested-by: Mark Rutland Suggested-by: Catalin Marinas Suggested-by: Will Deacon Signed-off-by: shechenglong Signed-off-by: Will Deacon --- arch/arm64/include/asm/spectre.h | 1 + arch/arm64/kernel/cpufeature.c | 6 ++++++ arch/arm64/kernel/proton-pack.c | 33 +++++++++++++++++--------------- 3 files changed, 25 insertions(+), 15 deletions(-) diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h index 8fef12626090..900454aaa292 100644 --- a/arch/arm64/include/asm/spectre.h +++ b/arch/arm64/include/asm/spectre.h @@ -117,6 +117,7 @@ void spectre_bhb_patch_wa3(struct alt_instr *alt, __le32 *origptr, __le32 *updptr, int nr_inst); void spectre_bhb_patch_clearbhb(struct alt_instr *alt, __le32 *origptr, __le32 *updptr, int nr_inst); +void spectre_print_disabled_mitigations(void); #endif /* __ASSEMBLY__ */ #endif /* __ASM_SPECTRE_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 5ed401ff79e3..e25b0f84a22d 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -95,6 +95,7 @@ #include #include +#include /* Kernel representation of AT_HWCAP and AT_HWCAP2 */ static DECLARE_BITMAP(elf_hwcap, MAX_CPU_FEATURES) __read_mostly; @@ -3875,6 +3876,11 @@ static void __init setup_system_capabilities(void) */ if (system_uses_ttbr0_pan()) pr_info("emulated: Privileged Access Never (PAN) using TTBR0_EL1 switching\n"); + + /* + * Report Spectre mitigations status. + */ + spectre_print_disabled_mitigations(); } void __init setup_system_features(void) diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index d833b7c1bba8..c7d70d04c164 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -91,12 +91,7 @@ early_param("nospectre_v2", parse_spectre_v2_param); static bool spectre_v2_mitigations_off(void) { - bool ret = __nospectre_v2 || cpu_mitigations_off(); - - if (ret) - pr_info_once("spectre-v2 mitigation disabled by command line option\n"); - - return ret; + return __nospectre_v2 || cpu_mitigations_off(); } static const char *get_bhb_affected_string(enum mitigation_state bhb_state) @@ -421,13 +416,8 @@ early_param("ssbd", parse_spectre_v4_param); */ static bool spectre_v4_mitigations_off(void) { - bool ret = cpu_mitigations_off() || - __spectre_v4_policy == SPECTRE_V4_POLICY_MITIGATION_DISABLED; - - if (ret) - pr_info_once("spectre-v4 mitigation disabled by command-line option\n"); - - return ret; + return cpu_mitigations_off() || + __spectre_v4_policy == SPECTRE_V4_POLICY_MITIGATION_DISABLED; } /* Do we need to toggle the mitigation state on entry to/exit from the kernel? */ @@ -1042,8 +1032,6 @@ void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry) if (arm64_get_spectre_v2_state() == SPECTRE_VULNERABLE) { /* No point mitigating Spectre-BHB alone. */ - } else if (cpu_mitigations_off() || __nospectre_bhb) { - pr_info_once("spectre-bhb mitigation disabled by command line option\n"); } else if (supports_ecbhb(SCOPE_LOCAL_CPU)) { state = SPECTRE_MITIGATED; set_bit(BHB_HW, &system_bhb_mitigations); @@ -1197,3 +1185,18 @@ void unpriv_ebpf_notify(int new_state) pr_err("WARNING: %s", EBPF_WARN); } #endif + +void spectre_print_disabled_mitigations(void) +{ + /* Keep a single copy of the common message suffix to avoid duplication. */ + const char *spectre_disabled_suffix = "mitigation disabled by command-line option\n"; + + if (spectre_v2_mitigations_off()) + pr_info("spectre-v2 %s", spectre_disabled_suffix); + + if (spectre_v4_mitigations_off()) + pr_info("spectre-v4 %s", spectre_disabled_suffix); + + if (__nospectre_bhb || cpu_mitigations_off()) + pr_info("spectre-bhb %s", spectre_disabled_suffix); +} From 6d4a0fbd34a40c9f877b136de874dc3498031309 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20Barna=C5=9B?= Date: Mon, 22 Sep 2025 13:04:26 +0000 Subject: [PATCH 10/11] arm64: Fail module loading if dynamic SCS patching fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Disallow a module to load if SCS dynamic patching fails for its code. For module loading, instead of running a dry-run to check for patching errors, try to run patching in the first run and propagate any errors so module loading will fail. Signed-off-by: Adrian Barnaś Reviewed-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/include/asm/scs.h | 2 +- arch/arm64/kernel/module.c | 12 ++++++++++-- arch/arm64/kernel/pi/map_kernel.c | 2 +- arch/arm64/kernel/pi/patch-scs.c | 10 ++++++---- arch/arm64/kernel/pi/pi.h | 2 +- 5 files changed, 19 insertions(+), 9 deletions(-) diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h index a76f9b387a26..c59f6324f2bb 100644 --- a/arch/arm64/include/asm/scs.h +++ b/arch/arm64/include/asm/scs.h @@ -53,7 +53,7 @@ enum { EDYNSCS_INVALID_CFA_OPCODE = 4, }; -int __pi_scs_patch(const u8 eh_frame[], int size); +int __pi_scs_patch(const u8 eh_frame[], int size, bool skip_dry_run); #endif /* __ASSEMBLY __ */ diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index d6d443c4a01a..01acbff8a1ae 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -495,10 +495,18 @@ int module_finalize(const Elf_Ehdr *hdr, if (scs_is_dynamic()) { s = find_section(hdr, sechdrs, ".init.eh_frame"); if (s) { - ret = __pi_scs_patch((void *)s->sh_addr, s->sh_size); - if (ret) + /* + * Because we can reject modules that are malformed + * so SCS patching fails, skip dry run and try to patch + * it in place. If patching fails, the module would not + * be loaded anyway. + */ + ret = __pi_scs_patch((void *)s->sh_addr, s->sh_size, true); + if (ret) { pr_err("module %s: error occurred during dynamic SCS patching (%d)\n", me->name, ret); + return -ENOEXEC; + } } } diff --git a/arch/arm64/kernel/pi/map_kernel.c b/arch/arm64/kernel/pi/map_kernel.c index e8ddbde31a83..659297f87cfa 100644 --- a/arch/arm64/kernel/pi/map_kernel.c +++ b/arch/arm64/kernel/pi/map_kernel.c @@ -104,7 +104,7 @@ static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level) if (enable_scs) { scs_patch(__eh_frame_start + va_offset, - __eh_frame_end - __eh_frame_start); + __eh_frame_end - __eh_frame_start, false); asm("ic ialluis"); dynamic_scs_is_enabled = true; diff --git a/arch/arm64/kernel/pi/patch-scs.c b/arch/arm64/kernel/pi/patch-scs.c index 55d0cd64ef71..bbe7d30ed12b 100644 --- a/arch/arm64/kernel/pi/patch-scs.c +++ b/arch/arm64/kernel/pi/patch-scs.c @@ -225,7 +225,7 @@ static int scs_handle_fde_frame(const struct eh_frame *frame, return 0; } -int scs_patch(const u8 eh_frame[], int size) +int scs_patch(const u8 eh_frame[], int size, bool skip_dry_run) { int code_alignment_factor = 1; bool fde_use_sdata8 = false; @@ -277,11 +277,13 @@ int scs_patch(const u8 eh_frame[], int size) } } else { ret = scs_handle_fde_frame(frame, code_alignment_factor, - fde_use_sdata8, true); + fde_use_sdata8, !skip_dry_run); if (ret) return ret; - scs_handle_fde_frame(frame, code_alignment_factor, - fde_use_sdata8, false); + + if (!skip_dry_run) + scs_handle_fde_frame(frame, code_alignment_factor, + fde_use_sdata8, false); } p += sizeof(frame->size) + frame->size; diff --git a/arch/arm64/kernel/pi/pi.h b/arch/arm64/kernel/pi/pi.h index 08ef9f80456b..aec3172d4003 100644 --- a/arch/arm64/kernel/pi/pi.h +++ b/arch/arm64/kernel/pi/pi.h @@ -27,7 +27,7 @@ extern pgd_t init_pg_dir[], init_pg_end[]; void init_feature_override(u64 boot_status, const void *fdt, int chosen); u64 kaslr_early_init(void *fdt, int chosen); void relocate_kernel(u64 offset); -int scs_patch(const u8 eh_frame[], int size); +int scs_patch(const u8 eh_frame[], int size, bool skip_dry_run); void map_range(phys_addr_t *pte, u64 start, u64 end, phys_addr_t pa, pgprot_t prot, int level, pte_t *tbl, bool may_use_cont, From 8e8ae788964aa2573b4335026db4068540fa6a86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20Barna=C5=9B?= Date: Mon, 22 Sep 2025 13:04:27 +0000 Subject: [PATCH 11/11] arm64: Reject modules with internal alternative callbacks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During module loading, check if a callback function used by the alternatives specified in the '.altinstruction' ELF section (if present) is located in core kernel .text. If not fail module loading before callback is called. Reported-by: Fanqin Cui Closes: https://lore.kernel.org/all/20250807072700.348514-1-fanqincui@163.com/ Signed-off-by: Adrian Barnaś Reviewed-by: Ard Biesheuvel [will: Folded in 'noinstr' tweak from Mark] Signed-off-by: Will Deacon --- arch/arm64/include/asm/alternative.h | 7 +++++-- arch/arm64/kernel/alternative.c | 19 ++++++++++++------- arch/arm64/kernel/module.c | 9 +++++++-- 3 files changed, 24 insertions(+), 11 deletions(-) diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index 00d97b8a757f..51746005239b 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -26,9 +26,12 @@ void __init apply_alternatives_all(void); bool alternative_is_applied(u16 cpucap); #ifdef CONFIG_MODULES -void apply_alternatives_module(void *start, size_t length); +int apply_alternatives_module(void *start, size_t length); #else -static inline void apply_alternatives_module(void *start, size_t length) { } +static inline int apply_alternatives_module(void *start, size_t length) +{ + return 0; +} #endif void alt_cb_patch_nops(struct alt_instr *alt, __le32 *origptr, diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index 8ff6610af496..f5ec7e7c1d3f 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -139,9 +139,9 @@ static noinstr void clean_dcache_range_nopatch(u64 start, u64 end) } while (cur += d_size, cur < end); } -static void __apply_alternatives(const struct alt_region *region, - bool is_module, - unsigned long *cpucap_mask) +static int __apply_alternatives(const struct alt_region *region, + bool is_module, + unsigned long *cpucap_mask) { struct alt_instr *alt; __le32 *origptr, *updptr; @@ -166,10 +166,13 @@ static void __apply_alternatives(const struct alt_region *region, updptr = is_module ? origptr : lm_alias(origptr); nr_inst = alt->orig_len / AARCH64_INSN_SIZE; - if (ALT_HAS_CB(alt)) + if (ALT_HAS_CB(alt)) { alt_cb = ALT_REPL_PTR(alt); - else + if (is_module && !core_kernel_text((unsigned long)alt_cb)) + return -ENOEXEC; + } else { alt_cb = patch_alternative; + } alt_cb(alt, origptr, updptr, nr_inst); @@ -193,6 +196,8 @@ static void __apply_alternatives(const struct alt_region *region, bitmap_and(applied_alternatives, applied_alternatives, system_cpucaps, ARM64_NCAPS); } + + return 0; } static void __init apply_alternatives_vdso(void) @@ -277,7 +282,7 @@ void __init apply_boot_alternatives(void) } #ifdef CONFIG_MODULES -void apply_alternatives_module(void *start, size_t length) +int apply_alternatives_module(void *start, size_t length) { struct alt_region region = { .begin = start, @@ -287,7 +292,7 @@ void apply_alternatives_module(void *start, size_t length) bitmap_fill(all_capabilities, ARM64_NCAPS); - __apply_alternatives(®ion, true, &all_capabilities[0]); + return __apply_alternatives(®ion, true, &all_capabilities[0]); } #endif diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 01acbff8a1ae..24adb581af0e 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -489,8 +489,13 @@ int module_finalize(const Elf_Ehdr *hdr, int ret; s = find_section(hdr, sechdrs, ".altinstructions"); - if (s) - apply_alternatives_module((void *)s->sh_addr, s->sh_size); + if (s) { + ret = apply_alternatives_module((void *)s->sh_addr, s->sh_size); + if (ret < 0) { + pr_err("module %s: error occurred when applying alternatives\n", me->name); + return ret; + } + } if (scs_is_dynamic()) { s = find_section(hdr, sechdrs, ".init.eh_frame");