From 0a8321dde01ffdbd9455a028194d57484def59eb Mon Sep 17 00:00:00 2001 From: "Ritesh Harjani (IBM)" Date: Sun, 1 Mar 2026 00:17:59 +0530 Subject: [PATCH 1/5] powerpc/mem: Move CMA reservations to arch_mm_preinit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 4267739cabb8 ("arch, mm: consolidate initialization of SPARSE memory model"), changed the initialization order of "pageblock_order" from... start_kernel() - setup_arch() - initmem_init() - sparse_init() - set_pageblock_order(); // this sets the pageblock_order - xxx_cma_reserve(); to... start_kernel() - setup_arch() - xxx_cma_reserve(); - mm_core_init_early() - free_area_init() - sparse_init() - set_pageblock_order() // this sets the pageblock_order. So this means, pageblock_order is not initialized before these cma reservation function calls, hence we are seeing CMA failures like... [ 0.000000] kvm_cma_reserve: reserving 3276 MiB for global area [ 0.000000] cma: pageblock_order not yet initialized. Called during early boot? [ 0.000000] cma: Failed to reserve 3276 MiB .... [ 0.000000][ T0] cma: pageblock_order not yet initialized. Called during early boot? [ 0.000000][ T0] cma: Failed to reserve 1024 MiB This patch moves these CMA reservations to arch_mm_preinit() which happens in mm_core_init() (which happens after pageblock_order is initialized), but before the memblock moves the free memory to buddy. Fixes: 4267739cabb8 ("arch, mm: consolidate initialization of SPARSE memory model") Suggested-by: Mike Rapoport Reported-and-tested-by: Sourabh Jain Closes: https://lore.kernel.org/linuxppc-dev/4c338a29-d190-44f3-8874-6cfa0a031f0b@linux.ibm.com/ Signed-off-by: Ritesh Harjani (IBM) Tested-by: Dan HorĂ¡k Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/6e532cf0db5be99afbe20eed699163d5e86cd71f.1772303986.git.ritesh.list@gmail.com --- arch/powerpc/kernel/setup-common.c | 10 ---------- arch/powerpc/mm/mem.c | 14 ++++++++++++++ 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index cb5b73adc250..b1761909c23f 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -35,7 +35,6 @@ #include #include #include -#include #include #include #include @@ -995,15 +994,6 @@ void __init setup_arch(char **cmdline_p) initmem_init(); - /* - * Reserve large chunks of memory for use by CMA for kdump, fadump, KVM and - * hugetlb. These must be called after initmem_init(), so that - * pageblock_order is initialised. - */ - fadump_cma_init(); - kdump_cma_reserve(); - kvm_cma_reserve(); - early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT); if (ppc_md.setup_arch) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index a985fc96b953..b7982d0243d4 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -30,6 +30,10 @@ #include #include +#include +#include +#include + #include unsigned long long memory_limit __initdata; @@ -268,6 +272,16 @@ void __init paging_init(void) void __init arch_mm_preinit(void) { + + /* + * Reserve large chunks of memory for use by CMA for kdump, fadump, KVM + * and hugetlb. These must be called after pageblock_order is + * initialised. + */ + fadump_cma_init(); + kdump_cma_reserve(); + kvm_cma_reserve(); + /* * book3s is limited to 16 page sizes due to encoding this in * a 4-bit field for slices. From e9bbfb4bfa86c6b5515b868d6982ac60505d7e39 Mon Sep 17 00:00:00 2001 From: Viktor Malik Date: Mon, 9 Mar 2026 15:40:45 +0100 Subject: [PATCH 2/5] powerpc, perf: Check that current->mm is alive before getting user callchain It may happen that mm is already released, which leads to kernel panic. This adds the NULL check for current->mm, similarly to commit 20afc60f892d ("x86, perf: Check that current->mm is alive before getting user callchain"). I was getting this panic when running a profiling BPF program (profile.py from bcc-tools): [26215.051935] Kernel attempted to read user page (588) - exploit attempt? (uid: 0) [26215.051950] BUG: Kernel NULL pointer dereference on read at 0x00000588 [26215.051952] Faulting instruction address: 0xc00000000020fac0 [26215.051957] Oops: Kernel access of bad area, sig: 11 [#1] [...] [26215.052049] Call Trace: [26215.052050] [c000000061da6d30] [c00000000020fc10] perf_callchain_user_64+0x2d0/0x490 (unreliable) [26215.052054] [c000000061da6dc0] [c00000000020f92c] perf_callchain_user+0x1c/0x30 [26215.052057] [c000000061da6de0] [c0000000005ab2a0] get_perf_callchain+0x100/0x360 [26215.052063] [c000000061da6e70] [c000000000573bc8] bpf_get_stackid+0x88/0xf0 [26215.052067] [c000000061da6ea0] [c008000000042258] bpf_prog_16d4ab9ab662f669_do_perf_event+0xf8/0x274 [...] In addition, move storing the top-level stack entry to generic perf_callchain_user to make sure the top-evel entry is always captured, even if current->mm is NULL. Fixes: 20002ded4d93 ("perf_counter: powerpc: Add callchain support") Signed-off-by: Viktor Malik Tested-by: Qiao Zhao Tested-by: Venkat Rao Bagalkote Reviewed-by: Saket Kumar Bhaskar [Maddy: fixed message to avoid checkpatch format style error] Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20260309144045.169427-1-vmalik@redhat.com --- arch/powerpc/perf/callchain.c | 5 +++++ arch/powerpc/perf/callchain_32.c | 1 - arch/powerpc/perf/callchain_64.c | 1 - 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c index 26aa26482c9a..992cc5c98214 100644 --- a/arch/powerpc/perf/callchain.c +++ b/arch/powerpc/perf/callchain.c @@ -103,6 +103,11 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { + perf_callchain_store(entry, perf_arch_instruction_pointer(regs)); + + if (!current->mm) + return; + if (!is_32bit_task()) perf_callchain_user_64(entry, regs); else diff --git a/arch/powerpc/perf/callchain_32.c b/arch/powerpc/perf/callchain_32.c index ddcc2d8aa64a..0de21c5d272c 100644 --- a/arch/powerpc/perf/callchain_32.c +++ b/arch/powerpc/perf/callchain_32.c @@ -142,7 +142,6 @@ void perf_callchain_user_32(struct perf_callchain_entry_ctx *entry, next_ip = perf_arch_instruction_pointer(regs); lr = regs->link; sp = regs->gpr[1]; - perf_callchain_store(entry, next_ip); while (entry->nr < entry->max_stack) { fp = (unsigned int __user *) (unsigned long) sp; diff --git a/arch/powerpc/perf/callchain_64.c b/arch/powerpc/perf/callchain_64.c index 115d1c105e8a..30fb61c5f0cb 100644 --- a/arch/powerpc/perf/callchain_64.c +++ b/arch/powerpc/perf/callchain_64.c @@ -77,7 +77,6 @@ void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry, next_ip = perf_arch_instruction_pointer(regs); lr = regs->link; sp = regs->gpr[1]; - perf_callchain_store(entry, next_ip); while (entry->nr < entry->max_stack) { fp = (unsigned long __user *) sp; From 6bc9c0a905228bea5c53ec195fe54f5f0233dccc Mon Sep 17 00:00:00 2001 From: Sayali Patil Date: Wed, 4 Mar 2026 17:52:00 +0530 Subject: [PATCH 3/5] powerpc: fix KUAP warning in VMX usercopy path On powerpc with PREEMPT_FULL or PREEMPT_LAZY and function tracing enabled, KUAP warnings can be triggered from the VMX usercopy path under memory stress workloads. KUAP requires that no subfunctions are called once userspace access has been enabled. The existing VMX copy implementation violates this requirement by invoking enter_vmx_usercopy() from the assembly path after userspace access has already been enabled. If preemption occurs in this window, the AMR state may not be preserved correctly, leading to unexpected userspace access state and resulting in KUAP warnings. Fix this by restructuring the VMX usercopy flow so that VMX selection and VMX state management are centralized in raw_copy_tofrom_user(), which is invoked by the raw_copy_{to,from,in}_user() wrappers. The new flow is: - raw_copy_{to,from,in}_user() calls raw_copy_tofrom_user() - raw_copy_tofrom_user() decides whether to use the VMX path based on size and CPU capability - Call enter_vmx_usercopy() before enabling userspace access - Enable userspace access as per the copy direction and perform the VMX copy - Disable userspace access as per the copy direction - Call exit_vmx_usercopy() - Fall back to the base copy routine if the VMX copy faults With this change, the VMX assembly routines no longer perform VMX state management or call helper functions; they only implement the copy operations. The previous feature-section based VMX selection inside __copy_tofrom_user_power7() is removed, and a dedicated __copy_tofrom_user_power7_vmx() entry point is introduced. This ensures correct KUAP ordering, avoids subfunction calls while KUAP is unlocked, and eliminates the warnings while preserving the VMX fast path. Fixes: de78a9c42a79 ("powerpc: Add a framework for Kernel Userspace Access Protection") Reported-by: Shrikanth Hegde Closes: https://lore.kernel.org/all/20260109064917.777587-2-sshegde@linux.ibm.com/ Suggested-by: Christophe Leroy (CS GROUP) Reviewed-by: Christophe Leroy (CS GROUP) Co-developed-by: Aboorva Devarajan Signed-off-by: Aboorva Devarajan Signed-off-by: Sayali Patil Tested-by: Shrikanth Hegde Tested-by: Venkat Rao Bagalkote Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20260304122201.153049-1-sayalip@linux.ibm.com --- arch/powerpc/include/asm/uaccess.h | 69 ++++++++++++++++++++---------- arch/powerpc/lib/copyuser_64.S | 1 + arch/powerpc/lib/copyuser_power7.S | 45 +++++++------------ arch/powerpc/lib/vmx-helper.c | 2 + 4 files changed, 65 insertions(+), 52 deletions(-) diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 570b3d91e2e4..17e63244e885 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -15,6 +15,9 @@ #define TASK_SIZE_MAX TASK_SIZE_USER64 #endif +/* Threshold above which VMX copy path is used */ +#define VMX_COPY_THRESHOLD 3328 + #include /* @@ -326,40 +329,62 @@ do { \ extern unsigned long __copy_tofrom_user(void __user *to, const void __user *from, unsigned long size); -#ifdef __powerpc64__ +unsigned long __copy_tofrom_user_base(void __user *to, + const void __user *from, unsigned long size); + +unsigned long __copy_tofrom_user_power7_vmx(void __user *to, + const void __user *from, unsigned long size); + +static __always_inline bool will_use_vmx(unsigned long n) +{ + return IS_ENABLED(CONFIG_ALTIVEC) && cpu_has_feature(CPU_FTR_VMX_COPY) && + n > VMX_COPY_THRESHOLD; +} + +static __always_inline unsigned long +raw_copy_tofrom_user(void __user *to, const void __user *from, + unsigned long n, unsigned long dir) +{ + unsigned long ret; + + if (will_use_vmx(n) && enter_vmx_usercopy()) { + allow_user_access(to, dir); + ret = __copy_tofrom_user_power7_vmx(to, from, n); + prevent_user_access(dir); + exit_vmx_usercopy(); + + if (unlikely(ret)) { + allow_user_access(to, dir); + ret = __copy_tofrom_user_base(to, from, n); + prevent_user_access(dir); + } + return ret; + } + + allow_user_access(to, dir); + ret = __copy_tofrom_user(to, from, n); + prevent_user_access(dir); + return ret; +} + +#ifdef CONFIG_PPC64 static inline unsigned long raw_copy_in_user(void __user *to, const void __user *from, unsigned long n) { - unsigned long ret; - barrier_nospec(); - allow_user_access(to, KUAP_READ_WRITE); - ret = __copy_tofrom_user(to, from, n); - prevent_user_access(KUAP_READ_WRITE); - return ret; + return raw_copy_tofrom_user(to, from, n, KUAP_READ_WRITE); } -#endif /* __powerpc64__ */ +#endif /* CONFIG_PPC64 */ -static inline unsigned long raw_copy_from_user(void *to, - const void __user *from, unsigned long n) +static inline unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n) { - unsigned long ret; - - allow_user_access(NULL, KUAP_READ); - ret = __copy_tofrom_user((__force void __user *)to, from, n); - prevent_user_access(KUAP_READ); - return ret; + return raw_copy_tofrom_user((__force void __user *)to, from, n, KUAP_READ); } static inline unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n) { - unsigned long ret; - - allow_user_access(to, KUAP_WRITE); - ret = __copy_tofrom_user(to, (__force const void __user *)from, n); - prevent_user_access(KUAP_WRITE); - return ret; + return raw_copy_tofrom_user(to, (__force const void __user *)from, n, KUAP_WRITE); } unsigned long __arch_clear_user(void __user *addr, unsigned long size); diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S index 9af969d2cc0c..25a99108caff 100644 --- a/arch/powerpc/lib/copyuser_64.S +++ b/arch/powerpc/lib/copyuser_64.S @@ -562,3 +562,4 @@ exc; std r10,32(3) li r5,4096 b .Ldst_aligned EXPORT_SYMBOL(__copy_tofrom_user) +EXPORT_SYMBOL(__copy_tofrom_user_base) diff --git a/arch/powerpc/lib/copyuser_power7.S b/arch/powerpc/lib/copyuser_power7.S index 8474c682a178..17dbcfbae25f 100644 --- a/arch/powerpc/lib/copyuser_power7.S +++ b/arch/powerpc/lib/copyuser_power7.S @@ -5,13 +5,9 @@ * * Author: Anton Blanchard */ +#include #include -#ifndef SELFTEST_CASE -/* 0 == don't use VMX, 1 == use VMX */ -#define SELFTEST_CASE 0 -#endif - #ifdef __BIG_ENDIAN__ #define LVS(VRT,RA,RB) lvsl VRT,RA,RB #define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRA,VRB,VRC @@ -47,10 +43,14 @@ ld r15,STK_REG(R15)(r1) ld r14,STK_REG(R14)(r1) .Ldo_err3: - bl CFUNC(exit_vmx_usercopy) + ld r6,STK_REG(R31)(r1) /* original destination pointer */ + ld r5,STK_REG(R29)(r1) /* original number of bytes */ + subf r7,r6,r3 /* #bytes copied */ + subf r3,r7,r5 /* #bytes not copied in r3 */ ld r0,STACKFRAMESIZE+16(r1) mtlr r0 - b .Lexit + addi r1,r1,STACKFRAMESIZE + blr #endif /* CONFIG_ALTIVEC */ .Ldo_err2: @@ -74,7 +74,6 @@ _GLOBAL(__copy_tofrom_user_power7) cmpldi r5,16 - cmpldi cr1,r5,3328 std r3,-STACKFRAMESIZE+STK_REG(R31)(r1) std r4,-STACKFRAMESIZE+STK_REG(R30)(r1) @@ -82,12 +81,6 @@ _GLOBAL(__copy_tofrom_user_power7) blt .Lshort_copy -#ifdef CONFIG_ALTIVEC -test_feature = SELFTEST_CASE -BEGIN_FTR_SECTION - bgt cr1,.Lvmx_copy -END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) -#endif .Lnonvmx_copy: /* Get the source 8B aligned */ @@ -263,23 +256,14 @@ err1; stb r0,0(r3) 15: li r3,0 blr -.Lunwind_stack_nonvmx_copy: - addi r1,r1,STACKFRAMESIZE - b .Lnonvmx_copy - -.Lvmx_copy: #ifdef CONFIG_ALTIVEC +_GLOBAL(__copy_tofrom_user_power7_vmx) mflr r0 std r0,16(r1) stdu r1,-STACKFRAMESIZE(r1) - bl CFUNC(enter_vmx_usercopy) - cmpwi cr1,r3,0 - ld r0,STACKFRAMESIZE+16(r1) - ld r3,STK_REG(R31)(r1) - ld r4,STK_REG(R30)(r1) - ld r5,STK_REG(R29)(r1) - mtlr r0 + std r3,STK_REG(R31)(r1) + std r5,STK_REG(R29)(r1) /* * We prefetch both the source and destination using enhanced touch * instructions. We use a stream ID of 0 for the load side and @@ -300,8 +284,6 @@ err1; stb r0,0(r3) DCBT_SETUP_STREAMS(r6, r7, r9, r10, r8) - beq cr1,.Lunwind_stack_nonvmx_copy - /* * If source and destination are not relatively aligned we use a * slower permute loop. @@ -478,7 +460,8 @@ err3; lbz r0,0(r4) err3; stb r0,0(r3) 15: addi r1,r1,STACKFRAMESIZE - b CFUNC(exit_vmx_usercopy) /* tail call optimise */ + li r3,0 + blr .Lvmx_unaligned_copy: /* Get the destination 16B aligned */ @@ -681,5 +664,7 @@ err3; lbz r0,0(r4) err3; stb r0,0(r3) 15: addi r1,r1,STACKFRAMESIZE - b CFUNC(exit_vmx_usercopy) /* tail call optimise */ + li r3,0 + blr +EXPORT_SYMBOL(__copy_tofrom_user_power7_vmx) #endif /* CONFIG_ALTIVEC */ diff --git a/arch/powerpc/lib/vmx-helper.c b/arch/powerpc/lib/vmx-helper.c index 54340912398f..554b248002b4 100644 --- a/arch/powerpc/lib/vmx-helper.c +++ b/arch/powerpc/lib/vmx-helper.c @@ -27,6 +27,7 @@ int enter_vmx_usercopy(void) return 1; } +EXPORT_SYMBOL(enter_vmx_usercopy); /* * This function must return 0 because we tail call optimise when calling @@ -49,6 +50,7 @@ int exit_vmx_usercopy(void) set_dec(1); return 0; } +EXPORT_SYMBOL(exit_vmx_usercopy); int enter_vmx_ops(void) { From 146c9ab38b48004b40735b6c1e1c2b5adf6436f9 Mon Sep 17 00:00:00 2001 From: Sayali Patil Date: Wed, 4 Mar 2026 17:52:01 +0530 Subject: [PATCH 4/5] powerpc/selftests/copyloops: extend selftest to exercise __copy_tofrom_user_power7_vmx The new PowerPC VMX fast path (__copy_tofrom_user_power7_vmx) is not exercised by existing copyloops selftests. This patch updates the selftest to exercise the VMX variant, ensuring the VMX copy path is validated. Changes include: - COPY_LOOP=test___copy_tofrom_user_power7_vmx with -D VMX_TEST is used in existing selftest build targets. - Inclusion of ../utils.c to provide get_auxv_entry() for hardware feature detection. - At runtime, the test skips execution if Altivec is not available. - Copy sizes above VMX_COPY_THRESHOLD are used to ensure the VMX path is taken. This enables validation of the VMX fast path without affecting systems that do not support Altivec. Signed-off-by: Sayali Patil Tested-by: Venkat Rao Bagalkote Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20260304122201.153049-2-sayalip@linux.ibm.com --- .../selftests/powerpc/copyloops/.gitignore | 4 ++-- .../testing/selftests/powerpc/copyloops/Makefile | 11 ++++++++--- tools/testing/selftests/powerpc/copyloops/stubs.S | 8 -------- .../selftests/powerpc/copyloops/validate.c | 15 ++++++++++++++- 4 files changed, 24 insertions(+), 14 deletions(-) diff --git a/tools/testing/selftests/powerpc/copyloops/.gitignore b/tools/testing/selftests/powerpc/copyloops/.gitignore index 7283e8b07b75..80d4270a71ac 100644 --- a/tools/testing/selftests/powerpc/copyloops/.gitignore +++ b/tools/testing/selftests/powerpc/copyloops/.gitignore @@ -2,8 +2,8 @@ copyuser_64_t0 copyuser_64_t1 copyuser_64_t2 -copyuser_p7_t0 -copyuser_p7_t1 +copyuser_p7 +copyuser_p7_vmx memcpy_64_t0 memcpy_64_t1 memcpy_64_t2 diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile index 42940f92d832..0c8efb0bddeb 100644 --- a/tools/testing/selftests/powerpc/copyloops/Makefile +++ b/tools/testing/selftests/powerpc/copyloops/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 TEST_GEN_PROGS := copyuser_64_t0 copyuser_64_t1 copyuser_64_t2 \ - copyuser_p7_t0 copyuser_p7_t1 \ + copyuser_p7 copyuser_p7_vmx \ memcpy_64_t0 memcpy_64_t1 memcpy_64_t2 \ memcpy_p7_t0 memcpy_p7_t1 copy_mc_64 \ copyuser_64_exc_t0 copyuser_64_exc_t1 copyuser_64_exc_t2 \ @@ -28,10 +28,15 @@ $(OUTPUT)/copyuser_64_t%: copyuser_64.S $(EXTRA_SOURCES) -D SELFTEST_CASE=$(subst copyuser_64_t,,$(notdir $@)) \ -o $@ $^ -$(OUTPUT)/copyuser_p7_t%: copyuser_power7.S $(EXTRA_SOURCES) +$(OUTPUT)/copyuser_p7: copyuser_power7.S $(EXTRA_SOURCES) $(CC) $(CPPFLAGS) $(CFLAGS) \ -D COPY_LOOP=test___copy_tofrom_user_power7 \ - -D SELFTEST_CASE=$(subst copyuser_p7_t,,$(notdir $@)) \ + -o $@ $^ + +$(OUTPUT)/copyuser_p7_vmx: copyuser_power7.S $(EXTRA_SOURCES) ../utils.c + $(CC) $(CPPFLAGS) $(CFLAGS) \ + -D COPY_LOOP=test___copy_tofrom_user_power7_vmx \ + -D VMX_TEST \ -o $@ $^ # Strictly speaking, we only need the memcpy_64 test cases for big-endian diff --git a/tools/testing/selftests/powerpc/copyloops/stubs.S b/tools/testing/selftests/powerpc/copyloops/stubs.S index ec8bcf2bf1c2..3a9cb8c9a3ee 100644 --- a/tools/testing/selftests/powerpc/copyloops/stubs.S +++ b/tools/testing/selftests/powerpc/copyloops/stubs.S @@ -1,13 +1,5 @@ #include -FUNC_START(enter_vmx_usercopy) - li r3,1 - blr - -FUNC_START(exit_vmx_usercopy) - li r3,0 - blr - FUNC_START(enter_vmx_ops) li r3,1 blr diff --git a/tools/testing/selftests/powerpc/copyloops/validate.c b/tools/testing/selftests/powerpc/copyloops/validate.c index 0f6873618552..fb822534fbe9 100644 --- a/tools/testing/selftests/powerpc/copyloops/validate.c +++ b/tools/testing/selftests/powerpc/copyloops/validate.c @@ -12,6 +12,10 @@ #define BUFLEN (MAX_LEN+MAX_OFFSET+2*MIN_REDZONE) #define POISON 0xa5 +#ifdef VMX_TEST +#define VMX_COPY_THRESHOLD 3328 +#endif + unsigned long COPY_LOOP(void *to, const void *from, unsigned long size); static void do_one(char *src, char *dst, unsigned long src_off, @@ -81,8 +85,12 @@ int test_copy_loop(void) /* Fill with sequential bytes */ for (i = 0; i < BUFLEN; i++) fill[i] = i & 0xff; - +#ifdef VMX_TEST + /* Force sizes above kernel VMX threshold (3328) */ + for (len = VMX_COPY_THRESHOLD + 1; len < MAX_LEN; len++) { +#else for (len = 1; len < MAX_LEN; len++) { +#endif for (src_off = 0; src_off < MAX_OFFSET; src_off++) { for (dst_off = 0; dst_off < MAX_OFFSET; dst_off++) { do_one(src, dst, src_off, dst_off, len, @@ -96,5 +104,10 @@ int test_copy_loop(void) int main(void) { +#ifdef VMX_TEST + /* Skip if Altivec not present */ + SKIP_IF_MSG(!have_hwcap(PPC_FEATURE_HAS_ALTIVEC), "ALTIVEC not supported"); +#endif + return test_harness(test_copy_loop, str(COPY_LOOP)); } From 82f73ef9c41e0623e0a8bdce4fa44a7237709f0c Mon Sep 17 00:00:00 2001 From: Nilay Shroff Date: Tue, 10 Mar 2026 13:51:24 +0530 Subject: [PATCH 5/5] powerpc/iommu: fix lockdep warning during PCI enumeration Commit a75b2be249d6 ("iommu: Add iommu_driver_get_domain_for_dev() helper") introduced iommu_driver_get_domain_for_dev() for driver code paths that hold iommu_group->mutex while attaching a device to an IOMMU domain. The same commit also added a lockdep assertion in iommu_get_domain_for_dev() to ensure that callers do not hold iommu_group->mutex when invoking it. On powerpc platforms, when PCI device ownership is switched from BLOCKED to the PLATFORM domain, the attach callback spapr_tce_platform_iommu_attach_dev() still calls iommu_get_domain_for_dev(). This happens while iommu_group->mutex is held during domain switching, which triggers the lockdep warning below during PCI enumeration: WARNING: drivers/iommu/iommu.c:2252 at iommu_get_domain_for_dev+0x38/0x80, CPU#2: swapper/0/1 Modules linked in: CPU: 2 UID: 0 PID: 1 Comm: swapper/0 Not tainted 7.0.0-rc2+ #35 PREEMPT Hardware name: IBM,9105-22A Power11 (architected) 0x820200 0xf000007 of:IBM,FW1120.00 (RB1120_115) hv:phyp pSeries NIP: c000000000c244c4 LR: c00000000005b5a4 CTR: c00000000005b578 REGS: c00000000a7bf280 TRAP: 0700 Not tainted (7.0.0-rc2+) MSR: 8000000002029033 CR: 22004422 XER: 0000000a CFAR: c000000000c24508 IRQMASK: 0 GPR00: c00000000005b5a4 c00000000a7bf520 c000000001dc8100 0000000000000001 GPR04: c00000000f972f10 0000000000000000 0000000000000000 0000000000000001 GPR08: 0000001ffbc60000 0000000000000001 0000000000000000 0000000000000000 GPR12: c00000000005b578 c000001fffffe480 c000000000011618 0000000000000000 GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR20: ffffffffffffefff 0000000000000000 c000000002d30eb0 0000000000000001 GPR24: c0000000017881f8 0000000000000000 0000000000000001 c00000000f972e00 GPR28: c00000000bbba0d0 0000000000000000 c00000000bbba0d0 c00000000f972e00 NIP [c000000000c244c4] iommu_get_domain_for_dev+0x38/0x80 LR [c00000000005b5a4] spapr_tce_platform_iommu_attach_dev+0x2c/0x98 Call Trace: iommu_get_domain_for_dev+0x68/0x80 (unreliable) spapr_tce_platform_iommu_attach_dev+0x2c/0x98 __iommu_attach_device+0x44/0x220 __iommu_device_set_domain+0xf4/0x194 __iommu_group_set_domain_internal+0xec/0x228 iommu_setup_default_domain+0x5f4/0x6a4 __iommu_probe_device+0x674/0x724 iommu_probe_device+0x50/0xb4 iommu_add_device+0x48/0x198 pci_dma_dev_setup_pSeriesLP+0x198/0x4f0 pcibios_bus_add_device+0x80/0x464 pci_bus_add_device+0x40/0x100 pci_bus_add_devices+0x54/0xb0 pcibios_init+0xd8/0x140 do_one_initcall+0x8c/0x598 kernel_init_freeable+0x3ec/0x850 kernel_init+0x34/0x270 ret_from_kernel_user_thread+0x14/0x1c Fix this by using iommu_driver_get_domain_for_dev() instead of iommu_get_domain_for_dev() in spapr_tce_platform_iommu_attach_dev(), which is the appropriate helper for callers holding the group mutex. Cc: stable@vger.kernel.org Fixes: a75b2be249d6 ("iommu: Add iommu_driver_get_domain_for_dev() helper") Closes: https://patchwork.ozlabs.org/project/linuxppc-dev/patch/d5c834ff-4c95-44dd-8bef-57242d63aeee@linux.ibm.com/ Signed-off-by: Nilay Shroff Reviewed-by: Nicolin Chen Tested-by: Venkat Rao Bagalkote [Maddy: Added Closes, tested and reviewed by tags] Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20260310082129.3630996-1-nilay@linux.ibm.com --- arch/powerpc/kernel/iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 0ce71310b7d9..d122e8447831 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -1159,7 +1159,7 @@ spapr_tce_platform_iommu_attach_dev(struct iommu_domain *platform_domain, struct device *dev, struct iommu_domain *old) { - struct iommu_domain *domain = iommu_get_domain_for_dev(dev); + struct iommu_domain *domain = iommu_driver_get_domain_for_dev(dev); struct iommu_table_group *table_group; struct iommu_group *grp;