From 07d5798aadfa922af1f81bf7fe4a1746a7e6d184 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 10 Feb 2026 19:31:06 +0800 Subject: [PATCH 01/20] LoongArch: Select HAVE_CMPXCHG_LOCAL in Kconfig LoongArch has already implemented cmpxchg_local(), this_cpu_cmpxchg() and similar functions, so select HAVE_CMPXCHG_LOCAL in Kconfig to avoid incurring the overhead of local_irq_save()/local_irq_restore() for page state helpers in mm/vmstat.c. Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 730f34214519..ae9d954aa052 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -130,6 +130,7 @@ config LOONGARCH select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_ARCH_USERFAULTFD_MINOR if USERFAULTFD select HAVE_ASM_MODVERSIONS + select HAVE_CMPXCHG_LOCAL select HAVE_CONTEXT_TRACKING_USER select HAVE_C_RECORDMCOUNT select HAVE_DEBUG_KMEMLEAK From 48543c4283e76d561d11b9955222b1a3054abdb9 Mon Sep 17 00:00:00 2001 From: George Guo Date: Tue, 10 Feb 2026 19:31:06 +0800 Subject: [PATCH 02/20] LoongArch: Add detection for SC.Q support Check the CPUCFG2_SCQ bit to determine if the current CPU supports the SC.Q instruction. Reviewed-by: Hengqi Chen Tested-by: Hengqi Chen Co-developed-by: Yangyang Lian Signed-off-by: Yangyang Lian Signed-off-by: George Guo Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/cpu-features.h | 1 + arch/loongarch/include/asm/cpu.h | 62 ++++++++++++----------- arch/loongarch/include/uapi/asm/hwcap.h | 1 + arch/loongarch/kernel/cpu-probe.c | 4 ++ arch/loongarch/kernel/proc.c | 1 + 5 files changed, 39 insertions(+), 30 deletions(-) diff --git a/arch/loongarch/include/asm/cpu-features.h b/arch/loongarch/include/asm/cpu-features.h index 3745d991a99a..8eefe7a2098b 100644 --- a/arch/loongarch/include/asm/cpu-features.h +++ b/arch/loongarch/include/asm/cpu-features.h @@ -35,6 +35,7 @@ */ #define cpu_has_cpucfg cpu_opt(LOONGARCH_CPU_CPUCFG) #define cpu_has_lam cpu_opt(LOONGARCH_CPU_LAM) +#define cpu_has_scq cpu_opt(LOONGARCH_CPU_SCQ) #define cpu_has_ual cpu_opt(LOONGARCH_CPU_UAL) #define cpu_has_fpu cpu_opt(LOONGARCH_CPU_FPU) #define cpu_has_lsx cpu_opt(LOONGARCH_CPU_LSX) diff --git a/arch/loongarch/include/asm/cpu.h b/arch/loongarch/include/asm/cpu.h index f3efb00b6141..1e60ab264cd0 100644 --- a/arch/loongarch/include/asm/cpu.h +++ b/arch/loongarch/include/asm/cpu.h @@ -95,39 +95,41 @@ static inline char *id_to_core_name(unsigned int id) */ #define CPU_FEATURE_CPUCFG 0 /* CPU has CPUCFG */ #define CPU_FEATURE_LAM 1 /* CPU has Atomic instructions */ -#define CPU_FEATURE_UAL 2 /* CPU supports unaligned access */ -#define CPU_FEATURE_FPU 3 /* CPU has FPU */ -#define CPU_FEATURE_LSX 4 /* CPU has LSX (128-bit SIMD) */ -#define CPU_FEATURE_LASX 5 /* CPU has LASX (256-bit SIMD) */ -#define CPU_FEATURE_CRC32 6 /* CPU has CRC32 instructions */ -#define CPU_FEATURE_COMPLEX 7 /* CPU has Complex instructions */ -#define CPU_FEATURE_CRYPTO 8 /* CPU has Crypto instructions */ -#define CPU_FEATURE_LVZ 9 /* CPU has Virtualization extension */ -#define CPU_FEATURE_LBT_X86 10 /* CPU has X86 Binary Translation */ -#define CPU_FEATURE_LBT_ARM 11 /* CPU has ARM Binary Translation */ -#define CPU_FEATURE_LBT_MIPS 12 /* CPU has MIPS Binary Translation */ -#define CPU_FEATURE_TLB 13 /* CPU has TLB */ -#define CPU_FEATURE_CSR 14 /* CPU has CSR */ -#define CPU_FEATURE_IOCSR 15 /* CPU has IOCSR */ -#define CPU_FEATURE_WATCH 16 /* CPU has watchpoint registers */ -#define CPU_FEATURE_VINT 17 /* CPU has vectored interrupts */ -#define CPU_FEATURE_CSRIPI 18 /* CPU has CSR-IPI */ -#define CPU_FEATURE_EXTIOI 19 /* CPU has EXT-IOI */ -#define CPU_FEATURE_PREFETCH 20 /* CPU has prefetch instructions */ -#define CPU_FEATURE_PMP 21 /* CPU has perfermance counter */ -#define CPU_FEATURE_SCALEFREQ 22 /* CPU supports cpufreq scaling */ -#define CPU_FEATURE_FLATMODE 23 /* CPU has flat mode */ -#define CPU_FEATURE_EIODECODE 24 /* CPU has EXTIOI interrupt pin decode mode */ -#define CPU_FEATURE_GUESTID 25 /* CPU has GuestID feature */ -#define CPU_FEATURE_HYPERVISOR 26 /* CPU has hypervisor (running in VM) */ -#define CPU_FEATURE_PTW 27 /* CPU has hardware page table walker */ -#define CPU_FEATURE_LSPW 28 /* CPU has LSPW (lddir/ldpte instructions) */ -#define CPU_FEATURE_MSGINT 29 /* CPU has MSG interrupt */ -#define CPU_FEATURE_AVECINT 30 /* CPU has AVEC interrupt */ -#define CPU_FEATURE_REDIRECTINT 31 /* CPU has interrupt remapping */ +#define CPU_FEATURE_SCQ 2 /* CPU has SC.Q instruction */ +#define CPU_FEATURE_UAL 3 /* CPU supports unaligned access */ +#define CPU_FEATURE_FPU 4 /* CPU has FPU */ +#define CPU_FEATURE_LSX 5 /* CPU has LSX (128-bit SIMD) */ +#define CPU_FEATURE_LASX 6 /* CPU has LASX (256-bit SIMD) */ +#define CPU_FEATURE_CRC32 7 /* CPU has CRC32 instructions */ +#define CPU_FEATURE_COMPLEX 8 /* CPU has Complex instructions */ +#define CPU_FEATURE_CRYPTO 9 /* CPU has Crypto instructions */ +#define CPU_FEATURE_LVZ 10 /* CPU has Virtualization extension */ +#define CPU_FEATURE_LBT_X86 11 /* CPU has X86 Binary Translation */ +#define CPU_FEATURE_LBT_ARM 12 /* CPU has ARM Binary Translation */ +#define CPU_FEATURE_LBT_MIPS 13 /* CPU has MIPS Binary Translation */ +#define CPU_FEATURE_TLB 14 /* CPU has TLB */ +#define CPU_FEATURE_CSR 15 /* CPU has CSR */ +#define CPU_FEATURE_IOCSR 16 /* CPU has IOCSR */ +#define CPU_FEATURE_WATCH 17 /* CPU has watchpoint registers */ +#define CPU_FEATURE_VINT 18 /* CPU has vectored interrupts */ +#define CPU_FEATURE_CSRIPI 19 /* CPU has CSR-IPI */ +#define CPU_FEATURE_EXTIOI 20 /* CPU has EXT-IOI */ +#define CPU_FEATURE_PREFETCH 21 /* CPU has prefetch instructions */ +#define CPU_FEATURE_PMP 22 /* CPU has perfermance counter */ +#define CPU_FEATURE_SCALEFREQ 23 /* CPU supports cpufreq scaling */ +#define CPU_FEATURE_FLATMODE 24 /* CPU has flat mode */ +#define CPU_FEATURE_EIODECODE 25 /* CPU has EXTIOI interrupt pin decode mode */ +#define CPU_FEATURE_GUESTID 26 /* CPU has GuestID feature */ +#define CPU_FEATURE_HYPERVISOR 27 /* CPU has hypervisor (running in VM) */ +#define CPU_FEATURE_PTW 28 /* CPU has hardware page table walker */ +#define CPU_FEATURE_LSPW 29 /* CPU has LSPW (lddir/ldpte instructions) */ +#define CPU_FEATURE_MSGINT 30 /* CPU has MSG interrupt */ +#define CPU_FEATURE_AVECINT 31 /* CPU has AVEC interrupt */ +#define CPU_FEATURE_REDIRECTINT 32 /* CPU has interrupt remapping */ #define LOONGARCH_CPU_CPUCFG BIT_ULL(CPU_FEATURE_CPUCFG) #define LOONGARCH_CPU_LAM BIT_ULL(CPU_FEATURE_LAM) +#define LOONGARCH_CPU_SCQ BIT_ULL(CPU_FEATURE_SCQ) #define LOONGARCH_CPU_UAL BIT_ULL(CPU_FEATURE_UAL) #define LOONGARCH_CPU_FPU BIT_ULL(CPU_FEATURE_FPU) #define LOONGARCH_CPU_LSX BIT_ULL(CPU_FEATURE_LSX) diff --git a/arch/loongarch/include/uapi/asm/hwcap.h b/arch/loongarch/include/uapi/asm/hwcap.h index 2b34e56cfa9e..49519b4362c6 100644 --- a/arch/loongarch/include/uapi/asm/hwcap.h +++ b/arch/loongarch/include/uapi/asm/hwcap.h @@ -18,5 +18,6 @@ #define HWCAP_LOONGARCH_LBT_MIPS (1 << 12) #define HWCAP_LOONGARCH_PTW (1 << 13) #define HWCAP_LOONGARCH_LSPW (1 << 14) +#define HWCAP_LOONGARCH_SCQ (1 << 15) #endif /* _UAPI_ASM_HWCAP_H */ diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c index 08a227034042..657bbae6c1c7 100644 --- a/arch/loongarch/kernel/cpu-probe.c +++ b/arch/loongarch/kernel/cpu-probe.c @@ -177,6 +177,10 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c) c->options |= LOONGARCH_CPU_LAM; elf_hwcap |= HWCAP_LOONGARCH_LAM; } + if (config & CPUCFG2_SCQ) { + c->options |= LOONGARCH_CPU_SCQ; + elf_hwcap |= HWCAP_LOONGARCH_SCQ; + } if (config & CPUCFG2_FP) { c->options |= LOONGARCH_CPU_FPU; elf_hwcap |= HWCAP_LOONGARCH_FPU; diff --git a/arch/loongarch/kernel/proc.c b/arch/loongarch/kernel/proc.c index a8800d20e11b..a60471b96440 100644 --- a/arch/loongarch/kernel/proc.c +++ b/arch/loongarch/kernel/proc.c @@ -62,6 +62,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "Features\t\t:"); if (cpu_has_cpucfg) seq_printf(m, " cpucfg"); if (cpu_has_lam) seq_printf(m, " lam"); + if (cpu_has_scq) seq_printf(m, " scq"); if (cpu_has_ual) seq_printf(m, " ual"); if (cpu_has_fpu) seq_printf(m, " fpu"); if (cpu_has_lsx) seq_printf(m, " lsx"); From f0e4b1b6e295299f5c9c79ad546dedbdf7132f45 Mon Sep 17 00:00:00 2001 From: George Guo Date: Tue, 10 Feb 2026 19:31:12 +0800 Subject: [PATCH 03/20] LoongArch: Add 128-bit atomic cmpxchg support Implement 128-bit atomic compare-and-exchange using LoongArch's LL.D/SC.Q instructions. At the same time, this fix the BPF scheduler test failures (scx_central and scx_qmap) caused by kmalloc_nolock_noprof() returning NULL, due to missing 128-bit atomics. The NULL returns lead to -ENOMEM errors during scheduler initialization, causing test cases to fail. Verified by testing with the scx_qmap scheduler (located in tools/sched_ext/). Building with `make` and running ./tools/sched_ext/build/bin/scx_qmap. Link: https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git/commit/?id=5fb750e8a9ae Acked-by: Hengqi Chen Tested-by: Hengqi Chen Co-developed-by:: Xi Ruoyao Signed-off-by: Xi Ruoyao Signed-off-by: George Guo Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 2 ++ arch/loongarch/include/asm/cmpxchg.h | 54 ++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index ae9d954aa052..bf5a5beab366 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -114,6 +114,7 @@ config LOONGARCH select GENERIC_TIME_VSYSCALL select GPIOLIB select HAS_IOPORT + select HAVE_ALIGNED_STRUCT_PAGE select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_BITREVERSE select HAVE_ARCH_JUMP_LABEL @@ -130,6 +131,7 @@ config LOONGARCH select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_ARCH_USERFAULTFD_MINOR if USERFAULTFD select HAVE_ASM_MODVERSIONS + select HAVE_CMPXCHG_DOUBLE select HAVE_CMPXCHG_LOCAL select HAVE_CONTEXT_TRACKING_USER select HAVE_C_RECORDMCOUNT diff --git a/arch/loongarch/include/asm/cmpxchg.h b/arch/loongarch/include/asm/cmpxchg.h index 0494c2ab553e..58cabab6d90d 100644 --- a/arch/loongarch/include/asm/cmpxchg.h +++ b/arch/loongarch/include/asm/cmpxchg.h @@ -8,6 +8,7 @@ #include #include #include +#include #define __xchg_amo_asm(amswap_db, m, val) \ ({ \ @@ -236,6 +237,59 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, unsigned int BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ arch_cmpxchg((ptr), (o), (n)); \ }) + +union __u128_halves { + u128 full; + struct { + u64 low; + u64 high; + }; +}; + +#define system_has_cmpxchg128() cpu_opt(LOONGARCH_CPU_SCQ) + +#define __arch_cmpxchg128(ptr, old, new, llsc_mb) \ +({ \ + union __u128_halves __old, __new, __ret; \ + volatile u64 *__ptr = (volatile u64 *)(ptr); \ + \ + __old.full = (old); \ + __new.full = (new); \ + \ + __asm__ __volatile__( \ + "1: ll.d %0, %3 # 128-bit cmpxchg low \n" \ + llsc_mb \ + " ld.d %1, %4 # 128-bit cmpxchg high \n" \ + " move $t0, %0 \n" \ + " move $t1, %1 \n" \ + " bne %0, %z5, 2f \n" \ + " bne %1, %z6, 2f \n" \ + " move $t0, %z7 \n" \ + " move $t1, %z8 \n" \ + "2: sc.q $t0, $t1, %2 \n" \ + " beqz $t0, 1b \n" \ + llsc_mb \ + : "=&r" (__ret.low), "=&r" (__ret.high) \ + : "r" (__ptr), \ + "ZC" (__ptr[0]), "m" (__ptr[1]), \ + "Jr" (__old.low), "Jr" (__old.high), \ + "Jr" (__new.low), "Jr" (__new.high) \ + : "t0", "t1", "memory"); \ + \ + __ret.full; \ +}) + +#define arch_cmpxchg128(ptr, o, n) \ +({ \ + BUILD_BUG_ON(sizeof(*(ptr)) != 16); \ + __arch_cmpxchg128(ptr, o, n, __WEAK_LLSC_MB); \ +}) + +#define arch_cmpxchg128_local(ptr, o, n) \ +({ \ + BUILD_BUG_ON(sizeof(*(ptr)) != 16); \ + __arch_cmpxchg128(ptr, o, n, ""); \ +}) #else #include #define arch_cmpxchg64_local(ptr, o, n) __generic_cmpxchg64_local((ptr), (o), (n)) From 52c1dbf4cb8e9c1df2a911bc8938efaeff51dfbb Mon Sep 17 00:00:00 2001 From: George Guo Date: Tue, 10 Feb 2026 19:31:12 +0800 Subject: [PATCH 04/20] LoongArch: Replace seq_printf() with seq_puts() for simple strings Fix warnings like: "Prefer seq_puts to seq_printf" by checkpatch.pl. Replace seq_printf() calls with seq_puts() in show_cpuinfo() when outputting simple constant strings without format specifiers. This improves performance slightly as seq_puts() avoids parsing the format string. Signed-off-by: George Guo Signed-off-by: Huacai Chen --- arch/loongarch/kernel/proc.c | 64 ++++++++++++++++++++++-------------- 1 file changed, 40 insertions(+), 24 deletions(-) diff --git a/arch/loongarch/kernel/proc.c b/arch/loongarch/kernel/proc.c index a60471b96440..a8127e83da65 100644 --- a/arch/loongarch/kernel/proc.c +++ b/arch/loongarch/kernel/proc.c @@ -50,33 +50,49 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "Address Sizes\t\t: %d bits physical, %d bits virtual\n", cpu_pabits + 1, cpu_vabits + 1); - seq_printf(m, "ISA\t\t\t:"); + seq_puts(m, "ISA\t\t\t:"); if (isa & LOONGARCH_CPU_ISA_LA32R) - seq_printf(m, " loongarch32r"); + seq_puts(m, " loongarch32r"); if (isa & LOONGARCH_CPU_ISA_LA32S) - seq_printf(m, " loongarch32s"); + seq_puts(m, " loongarch32s"); if (isa & LOONGARCH_CPU_ISA_LA64) - seq_printf(m, " loongarch64"); - seq_printf(m, "\n"); + seq_puts(m, " loongarch64"); + seq_puts(m, "\n"); - seq_printf(m, "Features\t\t:"); - if (cpu_has_cpucfg) seq_printf(m, " cpucfg"); - if (cpu_has_lam) seq_printf(m, " lam"); - if (cpu_has_scq) seq_printf(m, " scq"); - if (cpu_has_ual) seq_printf(m, " ual"); - if (cpu_has_fpu) seq_printf(m, " fpu"); - if (cpu_has_lsx) seq_printf(m, " lsx"); - if (cpu_has_lasx) seq_printf(m, " lasx"); - if (cpu_has_crc32) seq_printf(m, " crc32"); - if (cpu_has_complex) seq_printf(m, " complex"); - if (cpu_has_crypto) seq_printf(m, " crypto"); - if (cpu_has_ptw) seq_printf(m, " ptw"); - if (cpu_has_lspw) seq_printf(m, " lspw"); - if (cpu_has_lvz) seq_printf(m, " lvz"); - if (cpu_has_lbt_x86) seq_printf(m, " lbt_x86"); - if (cpu_has_lbt_arm) seq_printf(m, " lbt_arm"); - if (cpu_has_lbt_mips) seq_printf(m, " lbt_mips"); - seq_printf(m, "\n"); + seq_puts(m, "Features\t\t:"); + if (cpu_has_cpucfg) + seq_puts(m, " cpucfg"); + if (cpu_has_lam) + seq_puts(m, " lam"); + if (cpu_has_scq) + seq_puts(m, " scq"); + if (cpu_has_ual) + seq_puts(m, " ual"); + if (cpu_has_fpu) + seq_puts(m, " fpu"); + if (cpu_has_lsx) + seq_puts(m, " lsx"); + if (cpu_has_lasx) + seq_puts(m, " lasx"); + if (cpu_has_crc32) + seq_puts(m, " crc32"); + if (cpu_has_complex) + seq_puts(m, " complex"); + if (cpu_has_crypto) + seq_puts(m, " crypto"); + if (cpu_has_ptw) + seq_puts(m, " ptw"); + if (cpu_has_lspw) + seq_puts(m, " lspw"); + if (cpu_has_lvz) + seq_puts(m, " lvz"); + if (cpu_has_lbt_x86) + seq_puts(m, " lbt_x86"); + if (cpu_has_lbt_arm) + seq_puts(m, " lbt_arm"); + if (cpu_has_lbt_mips) + seq_puts(m, " lbt_mips"); + seq_puts(m, "\n"); seq_printf(m, "Hardware Watchpoint\t: %s", str_yes_no(cpu_has_watch)); if (cpu_has_watch) { @@ -84,7 +100,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) cpu_data[n].watch_ireg_count, cpu_data[n].watch_dreg_count); } - seq_printf(m, "\n\n"); + seq_puts(m, "\n\n"); return 0; } From abca6583a2aa00ed856907d86446ae527442a754 Mon Sep 17 00:00:00 2001 From: "Lain \"Fearyncess\" Yang" Date: Tue, 10 Feb 2026 19:31:12 +0800 Subject: [PATCH 05/20] LoongArch: Wire up memfd_secret system call LoongArch supports ARCH_HAS_SET_DIRECT_MAP, therefore wire up the memfd_secret system call, which just depends on it. Acked-by: Arnd Bergmann Acked-by: Mike Rapoport (Microsoft) Acked-by: David Hildenbrand (Red Hat) Signed-off-by: Lain "Fearyncess" Yang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/unistd.h | 1 + arch/loongarch/kernel/Makefile.syscalls | 5 ++--- tools/testing/selftests/mm/Makefile | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/loongarch/include/asm/unistd.h b/arch/loongarch/include/asm/unistd.h index e2c0f3d86c7b..e7649c158248 100644 --- a/arch/loongarch/include/asm/unistd.h +++ b/arch/loongarch/include/asm/unistd.h @@ -10,5 +10,6 @@ #define __ARCH_WANT_NEW_STAT #define __ARCH_WANT_SYS_CLONE +#define __ARCH_WANT_MEMFD_SECRET #define NR_syscalls (__NR_syscalls) diff --git a/arch/loongarch/kernel/Makefile.syscalls b/arch/loongarch/kernel/Makefile.syscalls index cd46c2b69c7f..06f160502537 100644 --- a/arch/loongarch/kernel/Makefile.syscalls +++ b/arch/loongarch/kernel/Makefile.syscalls @@ -1,5 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -# No special ABIs on loongarch so far -syscall_abis_32 += -syscall_abis_64 += +syscall_abis_32 += memfd_secret +syscall_abis_64 += memfd_secret diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index eaf9312097f7..79582438efc4 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -72,7 +72,7 @@ TEST_GEN_FILES += madv_populate TEST_GEN_FILES += map_fixed_noreplace TEST_GEN_FILES += map_hugetlb TEST_GEN_FILES += map_populate -ifneq (,$(filter $(ARCH),arm64 riscv riscv64 x86 x86_64)) +ifneq (,$(filter $(ARCH),arm64 riscv riscv64 x86 x86_64 loongarch32 loongarch64)) TEST_GEN_FILES += memfd_secret endif TEST_GEN_FILES += migration From 94b0c831eda778ae9e4f2164a8b3de485d8977bb Mon Sep 17 00:00:00 2001 From: John Garry Date: Tue, 10 Feb 2026 19:31:12 +0800 Subject: [PATCH 06/20] LoongArch: Make cpumask_of_node() robust against NUMA_NO_NODE The arch definition of cpumask_of_node() cannot handle NUMA_NO_NODE - which is a valid index - so add a check for this. Cc: stable@vger.kernel.org Signed-off-by: John Garry Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/topology.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/include/asm/topology.h b/arch/loongarch/include/asm/topology.h index f06e7ff25bb7..6b79d6183085 100644 --- a/arch/loongarch/include/asm/topology.h +++ b/arch/loongarch/include/asm/topology.h @@ -12,7 +12,7 @@ extern cpumask_t cpus_on_node[]; -#define cpumask_of_node(node) (&cpus_on_node[node]) +#define cpumask_of_node(node) ((node) == NUMA_NO_NODE ? cpu_all_mask : &cpus_on_node[node]) struct pci_bus; extern int pcibus_to_node(struct pci_bus *); From 009ee0c96416ecd0c568af72ee37965e06bde460 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 10 Feb 2026 19:31:13 +0800 Subject: [PATCH 07/20] LoongArch: Add HOTPLUG_SMT implementation For benchmarking or debugging purpose, we usually want to control SMT via boot parameter and sysfs knobs. So add HOTPLUG_SMT implementation. 1. Boot parameters: nosmt: Disable SMT, can be enabled via sysfs knobs. nosmt=force: Disable SMT, cannot be enabled via sysfs knobs. 2. Runtime sysfs controls: Write "on", "off", "forceoff" or the number of SMT threads (1, 2, ...) to /sys/devices/system/cpu/smt/control. Signed-off-by: Huacai Chen --- Documentation/admin-guide/kernel-parameters.txt | 2 +- arch/loongarch/Kconfig | 1 + arch/loongarch/kernel/smp.c | 13 +++++++++++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index aa0031108bc1..674ad1f3bf55 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4585,7 +4585,7 @@ Kernel parameters nosmt [KNL,MIPS,PPC,EARLY] Disable symmetric multithreading (SMT). Equivalent to smt=1. - [KNL,X86,PPC,S390] Disable symmetric multithreading (SMT). + [KNL,LOONGARCH,X86,PPC,S390] Disable symmetric multithreading (SMT). nosmt=force: Force disable SMT, cannot be undone via the sysfs control file. diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index bf5a5beab366..36dbc62cb2a9 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -186,6 +186,7 @@ config LOONGARCH select HAVE_SYSCALL_TRACEPOINTS select HAVE_TIF_NOHZ select HAVE_VIRT_CPU_ACCOUNTING_GEN + select HOTPLUG_SMT if HOTPLUG_CPU select IRQ_FORCED_THREADING select IRQ_LOONGARCH_CPU select LOCK_MM_AND_FIND_VMA diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index 8b2fcb3fb874..64a048f1b880 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -365,16 +365,29 @@ void __init loongson_smp_setup(void) void __init loongson_prepare_cpus(unsigned int max_cpus) { int i = 0; + int threads_per_core = 0; parse_acpi_topology(); cpu_data[0].global_id = cpu_logical_map(0); + if (!pptt_enabled) + threads_per_core = 1; + else { + for_each_possible_cpu(i) { + if (cpu_to_node(i) != 0) + continue; + if (cpus_are_siblings(0, i)) + threads_per_core++; + } + } + for (i = 0; i < loongson_sysconf.nr_cpus; i++) { set_cpu_present(i, true); csr_mail_send(0, __cpu_logical_map[i], 0); } per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; + cpu_smt_set_num_threads(threads_per_core, threads_per_core); } /* From 2172d6ebac9372eb01fe4505a53e18cb061e103b Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 10 Feb 2026 19:31:13 +0800 Subject: [PATCH 08/20] LoongArch: Prefer top-down allocation after arch_mem_init() Currently we use bottom-up allocation after sparse_init(), the reason is sparse_init() need a lot of memory, and bottom-up allocation may exhaust precious low memory (below 4GB). On the other hand, SWIOTLB and CMA need low memories for DMA32, so swiotlb_init() and dma_contiguous_reserve() need bottom-up allocation. Since swiotlb_init() and dma_contiguous_reserve() are both called in arch_mem_init(), we no longer need bottom-up allocation after that. So we set the allocation policy to top-down at the end of arch_mem_init(), in order to avoid later memory allocations (such as KASAN) exhaust low memory. This solve at least two problems: 1. Some buggy BIOSes use 0xfd000000~0xfe000000 for secondary CPUs, but didn't reserve this range, which causes smpboot failures. 2. Some DMA32 devices, such as Loongson-DRM and OHCI, cannot work with KASAN enabled. Cc: stable@vger.kernel.org Signed-off-by: Huacai Chen --- arch/loongarch/kernel/setup.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index 20cb6f306456..2b260d15b2e2 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -421,6 +421,7 @@ static void __init arch_mem_init(char **cmdline_p) PFN_UP(__pa_symbol(&__nosave_end))); memblock_dump_all(); + memblock_set_bottom_up(false); early_memtest(PFN_PHYS(ARCH_PFN_OFFSET), PFN_PHYS(max_low_pfn)); } From 77403a06d845db1caf9a6b0867b43e9dd8de8e4a Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Tue, 10 Feb 2026 19:31:13 +0800 Subject: [PATCH 09/20] LoongArch: Use %px to print unmodified unwinding address Currently, use %p to prevent leaking information about the kernel memory layout when printing the PC address, but the kernel log messages are not useful to debug problem if bt_address() returns 0. Given that the type of "pc" variable is unsigned long, it should use %px to print the unmodified unwinding address. Cc: stable@vger.kernel.org Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/kernel/unwind_orc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/kernel/unwind_orc.c b/arch/loongarch/kernel/unwind_orc.c index 8a6e3429a860..d6b3688a1ce9 100644 --- a/arch/loongarch/kernel/unwind_orc.c +++ b/arch/loongarch/kernel/unwind_orc.c @@ -494,7 +494,7 @@ bool unwind_next_frame(struct unwind_state *state) state->pc = bt_address(pc); if (!state->pc) { - pr_err("cannot find unwind pc at %p\n", (void *)pc); + pr_err("cannot find unwind pc at %px\n", (void *)pc); goto err; } From 055c7e75190e0be43037bd663a3f6aced194416e Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Tue, 10 Feb 2026 19:31:13 +0800 Subject: [PATCH 10/20] LoongArch: Handle percpu handler address for ORC unwinder After commit 4cd641a79e69 ("LoongArch: Remove unnecessary checks for ORC unwinder"), the system can not boot normally under some configs (such as enable KASAN), there are many error messages "cannot find unwind pc". The kernel boots normally with the defconfig, so no problem found out at the first time. Here is one way to reproduce: cd linux make mrproper defconfig -j"$(nproc)" scripts/config -e KASAN make olddefconfig all -j"$(nproc)" sudo make modules_install sudo make install sudo reboot The address that can not unwind is not a valid kernel address which is between "pcpu_handlers[cpu]" and "pcpu_handlers[cpu] + vec_sz" due to the code of eentry was copied to the new area of pcpu_handlers[cpu] in setup_tlb_handler(), handle this special case to get the valid address to unwind normally. Cc: stable@vger.kernel.org Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/setup.h | 3 +++ arch/loongarch/kernel/unwind_orc.c | 16 ++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/arch/loongarch/include/asm/setup.h b/arch/loongarch/include/asm/setup.h index 3c2fb16b11b6..f81375e5e89c 100644 --- a/arch/loongarch/include/asm/setup.h +++ b/arch/loongarch/include/asm/setup.h @@ -7,6 +7,7 @@ #define _LOONGARCH_SETUP_H #include +#include #include #include @@ -14,6 +15,8 @@ extern unsigned long eentry; extern unsigned long tlbrentry; +extern unsigned long pcpu_handlers[NR_CPUS]; +extern long exception_handlers[VECSIZE * 128 / sizeof(long)]; extern char init_command_line[COMMAND_LINE_SIZE]; extern void tlb_init(int cpu); extern void cpu_cache_init(void); diff --git a/arch/loongarch/kernel/unwind_orc.c b/arch/loongarch/kernel/unwind_orc.c index d6b3688a1ce9..11ba3e4ac9ee 100644 --- a/arch/loongarch/kernel/unwind_orc.c +++ b/arch/loongarch/kernel/unwind_orc.c @@ -352,6 +352,22 @@ static inline unsigned long bt_address(unsigned long ra) { extern unsigned long eentry; +#if defined(CONFIG_NUMA) && !defined(CONFIG_PREEMPT_RT) + int cpu; + int vec_sz = sizeof(exception_handlers); + + for_each_possible_cpu(cpu) { + if (!pcpu_handlers[cpu]) + continue; + + if (ra >= pcpu_handlers[cpu] && + ra < pcpu_handlers[cpu] + vec_sz) { + ra = ra + eentry - pcpu_handlers[cpu]; + break; + } + } +#endif + if (ra >= eentry && ra < eentry + EXCCODE_INT_END * VECSIZE) { unsigned long func; unsigned long type = (ra - eentry) / VECSIZE; From 70b0faae3590c628a98a627a10e5d211310169d4 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Tue, 10 Feb 2026 19:31:13 +0800 Subject: [PATCH 11/20] LoongArch: Guard percpu handler under !CONFIG_PREEMPT_RT After commit 88fd2b70120d ("LoongArch: Fix sleeping in atomic context for PREEMPT_RT"), it should guard percpu handler under !CONFIG_PREEMPT_RT to avoid redundant operations. Cc: stable@vger.kernel.org Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/kernel/unwind_prologue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/kernel/unwind_prologue.c b/arch/loongarch/kernel/unwind_prologue.c index 729e775bd40d..ee1c29686ab0 100644 --- a/arch/loongarch/kernel/unwind_prologue.c +++ b/arch/loongarch/kernel/unwind_prologue.c @@ -65,7 +65,7 @@ static inline bool scan_handlers(unsigned long entry_offset) static inline bool fix_exception(unsigned long pc) { -#ifdef CONFIG_NUMA +#if defined(CONFIG_NUMA) && !defined(CONFIG_PREEMPT_RT) int cpu; for_each_possible_cpu(cpu) { From 0e6f596d6ac635e80bb265d587b2287ef8fa1cd6 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Tue, 10 Feb 2026 19:31:14 +0800 Subject: [PATCH 12/20] LoongArch: Remove some extern variables in source files There are declarations of the variable "eentry", "pcpu_handlers[]" and "exception_handlers[]" in asm/setup.h, the source files already include this header file directly or indirectly, so no need to declare them in the source files, just remove the code. Cc: stable@vger.kernel.org Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/kernel/unwind_orc.c | 2 -- arch/loongarch/kernel/unwind_prologue.c | 4 ---- arch/loongarch/mm/tlb.c | 1 - 3 files changed, 7 deletions(-) diff --git a/arch/loongarch/kernel/unwind_orc.c b/arch/loongarch/kernel/unwind_orc.c index 11ba3e4ac9ee..9cfb5bb1991f 100644 --- a/arch/loongarch/kernel/unwind_orc.c +++ b/arch/loongarch/kernel/unwind_orc.c @@ -350,8 +350,6 @@ EXPORT_SYMBOL_GPL(unwind_start); static inline unsigned long bt_address(unsigned long ra) { - extern unsigned long eentry; - #if defined(CONFIG_NUMA) && !defined(CONFIG_PREEMPT_RT) int cpu; int vec_sz = sizeof(exception_handlers); diff --git a/arch/loongarch/kernel/unwind_prologue.c b/arch/loongarch/kernel/unwind_prologue.c index ee1c29686ab0..da07acad7973 100644 --- a/arch/loongarch/kernel/unwind_prologue.c +++ b/arch/loongarch/kernel/unwind_prologue.c @@ -23,10 +23,6 @@ extern const int unwind_hint_lasx; extern const int unwind_hint_lbt; extern const int unwind_hint_ri; extern const int unwind_hint_watch; -extern unsigned long eentry; -#ifdef CONFIG_NUMA -extern unsigned long pcpu_handlers[NR_CPUS]; -#endif static inline bool scan_handlers(unsigned long entry_offset) { diff --git a/arch/loongarch/mm/tlb.c b/arch/loongarch/mm/tlb.c index 6a3c91b9cacd..4014c4469587 100644 --- a/arch/loongarch/mm/tlb.c +++ b/arch/loongarch/mm/tlb.c @@ -262,7 +262,6 @@ static void output_pgtable_bits_defines(void) #ifdef CONFIG_NUMA unsigned long pcpu_handlers[NR_CPUS]; #endif -extern long exception_handlers[VECSIZE * 128 / sizeof(long)]; static void setup_tlb_handler(int cpu) { From 7cb37af61f09c9cfd90c43c9275307c16320cbf2 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Tue, 10 Feb 2026 19:31:17 +0800 Subject: [PATCH 13/20] LoongArch: Disable instrumentation for setup_ptwalker() According to Documentation/dev-tools/kasan.rst, software KASAN modes use compiler instrumentation to insert validity checks. Such instrumentation might be incompatible with some parts of the kernel, and therefore needs to be disabled, just use the attribute __no_sanitize_address to disable instrumentation for the low level function setup_ptwalker(). Otherwise bringing up the secondary CPUs failed when CONFIG_KASAN is set (especially when PTW is enabled), here are the call chains: smpboot_entry() start_secondary() cpu_probe() per_cpu_trap_init() tlb_init() setup_tlb_handler() setup_ptwalker() The reason is the PGD registers are configured in setup_ptwalker(), but KASAN instrumentation may cause TLB exceptions before that. Cc: stable@vger.kernel.org Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/mm/tlb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/mm/tlb.c b/arch/loongarch/mm/tlb.c index 4014c4469587..aaf7d685cc2a 100644 --- a/arch/loongarch/mm/tlb.c +++ b/arch/loongarch/mm/tlb.c @@ -202,7 +202,7 @@ void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep local_irq_restore(flags); } -static void setup_ptwalker(void) +static void __no_sanitize_address setup_ptwalker(void) { unsigned long pwctl0, pwctl1; unsigned long pgd_i = 0, pgd_w = 0; From 5ec5ac4ca27e4daa234540ac32f9fc5219377d53 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Tue, 10 Feb 2026 19:31:17 +0800 Subject: [PATCH 14/20] LoongArch: Rework KASAN initialization for PTW-enabled systems kasan_init_generic() indicates that kasan is fully initialized, so it should be put at end of kasan_init(). Otherwise bringing up the primary CPU failed when CONFIG_KASAN is set on PTW-enabled systems, here are the call chains: kernel_entry() start_kernel() setup_arch() kasan_init() kasan_init_generic() The reason is PTW-enabled systems have speculative accesses which means memory accesses to the shadow memory after kasan_init() may be executed by hardware before. However, accessing shadow memory is safe only after kasan fully initialized because kasan_init() uses a temporary PGD table until we have populated all levels of shadow page tables and writen the PGD register. Moving kasan_init_generic() later can defer the occasion of kasan_enabled(), so as to avoid speculative accesses on shadow pages. After moving kasan_init_generic() to the end, kasan_init() can no longer call kasan_mem_to_shadow() for shadow address conversion because it will always return kasan_early_shadow_page. On the other hand, we should keep the current logic of kasan_mem_to_shadow() for both the early and final stage because there may be instrumentation before kasan_init(). To solve this, we factor out a new mem_to_shadow() function from current kasan_mem_to_shadow() for the shadow address conversion in kasan_init(). Cc: stable@vger.kernel.org Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/mm/kasan_init.c | 80 +++++++++++++++++----------------- 1 file changed, 41 insertions(+), 39 deletions(-) diff --git a/arch/loongarch/mm/kasan_init.c b/arch/loongarch/mm/kasan_init.c index 170da98ad4f5..0fc02ca06457 100644 --- a/arch/loongarch/mm/kasan_init.c +++ b/arch/loongarch/mm/kasan_init.c @@ -40,39 +40,43 @@ static pgd_t kasan_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE); #define __pte_none(early, pte) (early ? pte_none(pte) : \ ((pte_val(pte) & _PFN_MASK) == (unsigned long)__pa(kasan_early_shadow_page))) +static void *mem_to_shadow(const void *addr) +{ + unsigned long offset = 0; + unsigned long maddr = (unsigned long)addr; + unsigned long xrange = (maddr >> XRANGE_SHIFT) & 0xffff; + + if (maddr >= FIXADDR_START) + return (void *)(kasan_early_shadow_page); + + maddr &= XRANGE_SHADOW_MASK; + switch (xrange) { + case XKPRANGE_CC_SEG: + offset = XKPRANGE_CC_SHADOW_OFFSET; + break; + case XKPRANGE_UC_SEG: + offset = XKPRANGE_UC_SHADOW_OFFSET; + break; + case XKPRANGE_WC_SEG: + offset = XKPRANGE_WC_SHADOW_OFFSET; + break; + case XKVRANGE_VC_SEG: + offset = XKVRANGE_VC_SHADOW_OFFSET; + break; + default: + WARN_ON(1); + return NULL; + } + + return (void *)((maddr >> KASAN_SHADOW_SCALE_SHIFT) + offset); +} + void *kasan_mem_to_shadow(const void *addr) { - if (!kasan_enabled()) { + if (kasan_enabled()) + return mem_to_shadow(addr); + else return (void *)(kasan_early_shadow_page); - } else { - unsigned long maddr = (unsigned long)addr; - unsigned long xrange = (maddr >> XRANGE_SHIFT) & 0xffff; - unsigned long offset = 0; - - if (maddr >= FIXADDR_START) - return (void *)(kasan_early_shadow_page); - - maddr &= XRANGE_SHADOW_MASK; - switch (xrange) { - case XKPRANGE_CC_SEG: - offset = XKPRANGE_CC_SHADOW_OFFSET; - break; - case XKPRANGE_UC_SEG: - offset = XKPRANGE_UC_SHADOW_OFFSET; - break; - case XKPRANGE_WC_SEG: - offset = XKPRANGE_WC_SHADOW_OFFSET; - break; - case XKVRANGE_VC_SEG: - offset = XKVRANGE_VC_SHADOW_OFFSET; - break; - default: - WARN_ON(1); - return NULL; - } - - return (void *)((maddr >> KASAN_SHADOW_SCALE_SHIFT) + offset); - } } const void *kasan_shadow_to_mem(const void *shadow_addr) @@ -293,11 +297,8 @@ void __init kasan_init(void) /* Maps everything to a single page of zeroes */ kasan_pgd_populate(KASAN_SHADOW_START, KASAN_SHADOW_END, NUMA_NO_NODE, true); - kasan_populate_early_shadow(kasan_mem_to_shadow((void *)VMALLOC_START), - kasan_mem_to_shadow((void *)KFENCE_AREA_END)); - - /* Enable KASAN here before kasan_mem_to_shadow(). */ - kasan_init_generic(); + kasan_populate_early_shadow(mem_to_shadow((void *)VMALLOC_START), + mem_to_shadow((void *)KFENCE_AREA_END)); /* Populate the linear mapping */ for_each_mem_range(i, &pa_start, &pa_end) { @@ -307,13 +308,13 @@ void __init kasan_init(void) if (start >= end) break; - kasan_map_populate((unsigned long)kasan_mem_to_shadow(start), - (unsigned long)kasan_mem_to_shadow(end), NUMA_NO_NODE); + kasan_map_populate((unsigned long)mem_to_shadow(start), + (unsigned long)mem_to_shadow(end), NUMA_NO_NODE); } /* Populate modules mapping */ - kasan_map_populate((unsigned long)kasan_mem_to_shadow((void *)MODULES_VADDR), - (unsigned long)kasan_mem_to_shadow((void *)MODULES_END), NUMA_NO_NODE); + kasan_map_populate((unsigned long)mem_to_shadow((void *)MODULES_VADDR), + (unsigned long)mem_to_shadow((void *)MODULES_END), NUMA_NO_NODE); /* * KAsan may reuse the contents of kasan_early_shadow_pte directly, so we * should make sure that it maps the zero page read-only. @@ -328,4 +329,5 @@ void __init kasan_init(void) /* At this point kasan is fully initialized. Enable error messages */ init_task.kasan_depth = 0; + kasan_init_generic(); } From f5db714646c0a90842f7c09cda72b7844a46a179 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20L=C3=B3pez?= Date: Tue, 10 Feb 2026 19:31:17 +0800 Subject: [PATCH 15/20] LoongArch: Use IS_ERR_PCPU() macro for KGDB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In commit a759e37fb467 ("err.h: add ERR_PTR_PCPU(), PTR_ERR_PCPU() and IS_ERR_PCPU() macros"), specialized macros were added to check an error within a __percpu pointer, so use them instead of manually casting with __force, like all other users of register_wide_hw_breakpoint(). Signed-off-by: Carlos López Signed-off-by: Huacai Chen --- arch/loongarch/kernel/kgdb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/kernel/kgdb.c b/arch/loongarch/kernel/kgdb.c index 7be5b4c0c900..17664a6043b1 100644 --- a/arch/loongarch/kernel/kgdb.c +++ b/arch/loongarch/kernel/kgdb.c @@ -697,7 +697,7 @@ void kgdb_arch_late(void) continue; breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL, NULL); - if (IS_ERR((void * __force)breakinfo[i].pev)) { + if (IS_ERR_PCPU(breakinfo[i].pev)) { pr_err("kgdb: Could not allocate hw breakpoints.\n"); breakinfo[i].pev = NULL; return; From 4ab17e762b34c847478f694932c4cd4b1ac2c343 Mon Sep 17 00:00:00 2001 From: Hengqi Chen Date: Tue, 10 Feb 2026 19:31:17 +0800 Subject: [PATCH 16/20] LoongArch: BPF: Use BPF prog pack allocator Use bpf_jit_binary_pack_alloc() for BPF JIT binaries. The BPF prog pack allocator creates a pair of RW and RX buffers. The BPF JIT writes the program into the RW buffer. When the JIT is done, the program is copied to the final RX buffer with bpf_jit_binary_pack_finalize(). Acked-by: Tiezhu Yang Tested-by: Vincent Li Signed-off-by: Hengqi Chen Signed-off-by: Huacai Chen --- arch/loongarch/net/bpf_jit.c | 118 ++++++++++++++++++++++++----------- arch/loongarch/net/bpf_jit.h | 2 +- 2 files changed, 83 insertions(+), 37 deletions(-) diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index d1d5a65308b9..6c041e0ec4d7 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -461,10 +461,10 @@ static int add_exception_handler(const struct bpf_insn *insn, int dst_reg) { unsigned long pc; - off_t offset; + off_t ins_offset, fixup_offset; struct exception_table_entry *ex; - if (!ctx->image || !ctx->prog->aux->extable) + if (!ctx->image || !ctx->ro_image || !ctx->prog->aux->extable) return 0; if (BPF_MODE(insn->code) != BPF_PROBE_MEM && @@ -475,14 +475,18 @@ static int add_exception_handler(const struct bpf_insn *insn, return -EINVAL; ex = &ctx->prog->aux->extable[ctx->num_exentries]; - pc = (unsigned long)&ctx->image[ctx->idx - 1]; + pc = (unsigned long)&ctx->ro_image[ctx->idx - 1]; - offset = pc - (long)&ex->insn; - if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN)) + /* + * This is the relative offset of the instruction that may fault from + * the exception table itself. This will be written to the exception + * table and if this instruction faults, the destination register will + * be set to '0' and the execution will jump to the next instruction. + */ + ins_offset = pc - (long)&ex->insn; + if (WARN_ON_ONCE(ins_offset >= 0 || ins_offset < INT_MIN)) return -ERANGE; - ex->insn = offset; - /* * Since the extable follows the program, the fixup offset is always * negative and limited to BPF_JIT_REGION_SIZE. Store a positive value @@ -490,13 +494,23 @@ static int add_exception_handler(const struct bpf_insn *insn, * bits. We don't need to worry about buildtime or runtime sort * modifying the upper bits because the table is already sorted, and * isn't part of the main exception table. + * + * The fixup_offset is set to the next instruction from the instruction + * that may fault. The execution will jump to this after handling the fault. */ - offset = (long)&ex->fixup - (pc + LOONGARCH_INSN_SIZE); - if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, offset)) + fixup_offset = (long)&ex->fixup - (pc + LOONGARCH_INSN_SIZE); + if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, fixup_offset)) return -ERANGE; + /* + * The offsets above have been calculated using the RO buffer but we + * need to use the R/W buffer for writes. Switch ex to rw buffer for writing. + */ + ex = (void *)ctx->image + ((void *)ex - (void *)ctx->ro_image); + ex->insn = ins_offset; + ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, fixup_offset) | + FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg); ex->type = EX_TYPE_BPF; - ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, offset) | FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg); ctx->num_exentries++; @@ -1829,11 +1843,12 @@ int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) { bool tmp_blinded = false, extra_pass = false; - u8 *image_ptr; + u8 *image_ptr, *ro_image_ptr; int image_size, prog_size, extable_size; struct jit_ctx ctx; struct jit_data *jit_data; struct bpf_binary_header *header; + struct bpf_binary_header *ro_header; struct bpf_prog *tmp, *orig_prog = prog; /* @@ -1868,8 +1883,10 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) } if (jit_data->ctx.offset) { ctx = jit_data->ctx; - image_ptr = jit_data->image; + ro_header = jit_data->ro_header; + ro_image_ptr = (void *)ctx.ro_image; header = jit_data->header; + image_ptr = (void *)header + ((void *)ro_image_ptr - (void *)ro_header); extra_pass = true; prog_size = sizeof(u32) * ctx.idx; goto skip_init_ctx; @@ -1903,17 +1920,25 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) prog_size = sizeof(u32) * ctx.idx; image_size = prog_size + extable_size; /* Now we know the size of the structure to make */ - header = bpf_jit_binary_alloc(image_size, &image_ptr, - sizeof(u32), jit_fill_hole); - if (header == NULL) { + ro_header = bpf_jit_binary_pack_alloc(image_size, &ro_image_ptr, sizeof(u32), + &header, &image_ptr, jit_fill_hole); + if (!ro_header) { prog = orig_prog; goto out_offset; } /* 2. Now, the actual pass to generate final JIT code */ + /* + * Use the image (RW) for writing the JITed instructions. But also save + * the ro_image (RX) for calculating the offsets in the image. The RW + * image will be later copied to the RX image from where the program will + * run. The bpf_jit_binary_pack_finalize() will do this copy in the final + * step. + */ ctx.image = (union loongarch_instruction *)image_ptr; + ctx.ro_image = (union loongarch_instruction *)ro_image_ptr; if (extable_size) - prog->aux->extable = (void *)image_ptr + prog_size; + prog->aux->extable = (void *)ro_image_ptr + prog_size; skip_init_ctx: ctx.idx = 0; @@ -1921,48 +1946,47 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) build_prologue(&ctx); if (build_body(&ctx, extra_pass)) { - bpf_jit_binary_free(header); prog = orig_prog; - goto out_offset; + goto out_free; } build_epilogue(&ctx); /* 3. Extra pass to validate JITed code */ if (validate_ctx(&ctx)) { - bpf_jit_binary_free(header); prog = orig_prog; - goto out_offset; + goto out_free; } /* And we're done */ if (bpf_jit_enable > 1) bpf_jit_dump(prog->len, prog_size, 2, ctx.image); - /* Update the icache */ - flush_icache_range((unsigned long)header, (unsigned long)(ctx.image + ctx.idx)); - if (!prog->is_func || extra_pass) { - int err; - if (extra_pass && ctx.idx != jit_data->ctx.idx) { pr_err_once("multi-func JIT bug %d != %d\n", ctx.idx, jit_data->ctx.idx); goto out_free; } - err = bpf_jit_binary_lock_ro(header); - if (err) { - pr_err_once("bpf_jit_binary_lock_ro() returned %d\n", - err); + if (WARN_ON(bpf_jit_binary_pack_finalize(ro_header, header))) { + /* ro_header has been freed */ + ro_header = NULL; + prog = orig_prog; goto out_free; } + /* + * The instructions have now been copied to the ROX region from + * where they will execute. Now the data cache has to be cleaned + * to the PoU and the I-cache has to be invalidated for the VAs. + */ + bpf_flush_icache(ro_header, ctx.ro_image + ctx.idx); } else { jit_data->ctx = ctx; - jit_data->image = image_ptr; jit_data->header = header; + jit_data->ro_header = ro_header; } prog->jited = 1; prog->jited_len = prog_size; - prog->bpf_func = (void *)ctx.image; + prog->bpf_func = (void *)ctx.ro_image; if (!prog->is_func || extra_pass) { int i; @@ -1982,17 +2006,39 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) if (tmp_blinded) bpf_jit_prog_release_other(prog, prog == orig_prog ? tmp : orig_prog); - return prog; out_free: - bpf_jit_binary_free(header); - prog->bpf_func = NULL; - prog->jited = 0; - prog->jited_len = 0; + if (header) { + bpf_arch_text_copy(&ro_header->size, &header->size, sizeof(header->size)); + bpf_jit_binary_pack_free(ro_header, header); + } goto out_offset; } +void bpf_jit_free(struct bpf_prog *prog) +{ + if (prog->jited) { + struct jit_data *jit_data = prog->aux->jit_data; + struct bpf_binary_header *hdr; + + /* + * If we fail the final pass of JIT (from jit_subprogs), the + * program may not be finalized yet. Call finalize here before + * freeing it. + */ + if (jit_data) { + bpf_jit_binary_pack_finalize(jit_data->ro_header, jit_data->header); + kfree(jit_data); + } + hdr = bpf_jit_binary_pack_hdr(prog); + bpf_jit_binary_pack_free(hdr, NULL); + WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(prog)); + } + + bpf_prog_unlock_free(prog); +} + bool bpf_jit_bypass_spec_v1(void) { return true; diff --git a/arch/loongarch/net/bpf_jit.h b/arch/loongarch/net/bpf_jit.h index 75b6330030a9..38eb75f79069 100644 --- a/arch/loongarch/net/bpf_jit.h +++ b/arch/loongarch/net/bpf_jit.h @@ -24,7 +24,7 @@ struct jit_ctx { struct jit_data { struct bpf_binary_header *header; - u8 *image; + struct bpf_binary_header *ro_header; struct jit_ctx ctx; }; From ef54c517a9376b188da06b5e1ed556129c4280be Mon Sep 17 00:00:00 2001 From: Hengqi Chen Date: Tue, 10 Feb 2026 19:31:18 +0800 Subject: [PATCH 17/20] LoongArch: BPF: Implement PROBE_MEM32 pseudo instructions Add support for `{LDX,STX,ST} | PROBE_MEM32 | {B,H,W,DW}` instructions. They are similar to PROBE_MEM instructions with the following differences: * PROBE_MEM32 supports store. * PROBE_MEM32 relies on the verifier to clear upper 32-bit of the src/dst register * PROBE_MEM32 adds 64-bit kern_vm_start address (which is stored in S6 in the prologue). Due to bpf_arena constructions such S6 + reg + off16 access is guaranteed to be within arena virtual range, so no address check at run-time. * S6 is a free callee-saved register, so it is used to store arena_vm_start * PROBE_MEM32 allows ST and STX. If they fault the store is a nop. When LDX faults the destination register is zeroed. To support these on LoongArch, we employ the t2/t3 registers to store the intermediate results of reg_arena + src/dst reg and use the t2/t3 registers as the new src/dst reg. This allows us to reuse most of the existing code. Acked-by: Tiezhu Yang Tested-by: Vincent Li Signed-off-by: Hengqi Chen Signed-off-by: Huacai Chen --- arch/loongarch/net/bpf_jit.c | 73 +++++++++++++++++++++++++++++++++--- arch/loongarch/net/bpf_jit.h | 1 + 2 files changed, 68 insertions(+), 6 deletions(-) diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index 6c041e0ec4d7..b20ecc326a38 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -17,6 +17,7 @@ #define LOONGARCH_BPF_FENTRY_NBYTES (LOONGARCH_LONG_JUMP_NINSNS * 4) #define REG_TCC LOONGARCH_GPR_A6 +#define REG_ARENA LOONGARCH_GPR_S6 /* For storing arena_vm_start */ #define BPF_TAIL_CALL_CNT_PTR_STACK_OFF(stack) (round_up(stack, 16) - 80) static const int regmap[] = { @@ -136,6 +137,9 @@ static void build_prologue(struct jit_ctx *ctx) /* To store tcc and tcc_ptr */ stack_adjust += sizeof(long) * 2; + if (ctx->arena_vm_start) + stack_adjust += 8; + stack_adjust = round_up(stack_adjust, 16); stack_adjust += bpf_stack_adjust; @@ -178,6 +182,11 @@ static void build_prologue(struct jit_ctx *ctx) store_offset -= sizeof(long); emit_insn(ctx, std, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, store_offset); + if (ctx->arena_vm_start) { + store_offset -= sizeof(long); + emit_insn(ctx, std, REG_ARENA, LOONGARCH_GPR_SP, store_offset); + } + prepare_bpf_tail_call_cnt(ctx, &store_offset); emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_adjust); @@ -186,6 +195,9 @@ static void build_prologue(struct jit_ctx *ctx) emit_insn(ctx, addid, regmap[BPF_REG_FP], LOONGARCH_GPR_SP, bpf_stack_adjust); ctx->stack_size = stack_adjust; + + if (ctx->arena_vm_start) + move_imm(ctx, REG_ARENA, ctx->arena_vm_start, false); } static void __build_epilogue(struct jit_ctx *ctx, bool is_tail_call) @@ -217,6 +229,11 @@ static void __build_epilogue(struct jit_ctx *ctx, bool is_tail_call) load_offset -= sizeof(long); emit_insn(ctx, ldd, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, load_offset); + if (ctx->arena_vm_start) { + load_offset -= sizeof(long); + emit_insn(ctx, ldd, REG_ARENA, LOONGARCH_GPR_SP, load_offset); + } + /* * When push into the stack, follow the order of tcc then tcc_ptr. * When pop from the stack, first pop tcc_ptr then followed by tcc. @@ -442,6 +459,7 @@ static bool is_signed_bpf_cond(u8 cond) #define BPF_FIXUP_REG_MASK GENMASK(31, 27) #define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0) +#define REG_DONT_CLEAR_MARKER 0 bool ex_handler_bpf(const struct exception_table_entry *ex, struct pt_regs *regs) @@ -449,7 +467,8 @@ bool ex_handler_bpf(const struct exception_table_entry *ex, int dst_reg = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup); off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup); - regs->regs[dst_reg] = 0; + if (dst_reg != REG_DONT_CLEAR_MARKER) + regs->regs[dst_reg] = 0; regs->csr_era = (unsigned long)&ex->fixup - offset; return true; @@ -468,7 +487,8 @@ static int add_exception_handler(const struct bpf_insn *insn, return 0; if (BPF_MODE(insn->code) != BPF_PROBE_MEM && - BPF_MODE(insn->code) != BPF_PROBE_MEMSX) + BPF_MODE(insn->code) != BPF_PROBE_MEMSX && + BPF_MODE(insn->code) != BPF_PROBE_MEM32) return 0; if (WARN_ON_ONCE(ctx->num_exentries >= ctx->prog->aux->num_exentries)) @@ -528,8 +548,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext const u8 cond = BPF_OP(code); const u8 t1 = LOONGARCH_GPR_T1; const u8 t2 = LOONGARCH_GPR_T2; - const u8 src = regmap[insn->src_reg]; - const u8 dst = regmap[insn->dst_reg]; + const u8 t3 = LOONGARCH_GPR_T3; + u8 src = regmap[insn->src_reg]; + u8 dst = regmap[insn->dst_reg]; const s16 off = insn->off; const s32 imm = insn->imm; const bool is32 = BPF_CLASS(insn->code) == BPF_ALU || BPF_CLASS(insn->code) == BPF_JMP32; @@ -1035,8 +1056,19 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext case BPF_LDX | BPF_PROBE_MEMSX | BPF_B: case BPF_LDX | BPF_PROBE_MEMSX | BPF_H: case BPF_LDX | BPF_PROBE_MEMSX | BPF_W: - sign_extend = BPF_MODE(insn->code) == BPF_MEMSX || - BPF_MODE(insn->code) == BPF_PROBE_MEMSX; + /* LDX | PROBE_MEM32: dst = *(unsigned size *)(src + REG_ARENA + off) */ + case BPF_LDX | BPF_PROBE_MEM32 | BPF_B: + case BPF_LDX | BPF_PROBE_MEM32 | BPF_H: + case BPF_LDX | BPF_PROBE_MEM32 | BPF_W: + case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW: + sign_extend = BPF_MODE(code) == BPF_MEMSX || + BPF_MODE(code) == BPF_PROBE_MEMSX; + + if (BPF_MODE(code) == BPF_PROBE_MEM32) { + emit_insn(ctx, addd, t2, src, REG_ARENA); + src = t2; + } + switch (BPF_SIZE(code)) { case BPF_B: if (is_signed_imm12(off)) { @@ -1096,6 +1128,16 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext case BPF_ST | BPF_MEM | BPF_H: case BPF_ST | BPF_MEM | BPF_W: case BPF_ST | BPF_MEM | BPF_DW: + /* ST | PROBE_MEM32: *(size *)(dst + REG_ARENA + off) = imm */ + case BPF_ST | BPF_PROBE_MEM32 | BPF_B: + case BPF_ST | BPF_PROBE_MEM32 | BPF_H: + case BPF_ST | BPF_PROBE_MEM32 | BPF_W: + case BPF_ST | BPF_PROBE_MEM32 | BPF_DW: + if (BPF_MODE(code) == BPF_PROBE_MEM32) { + emit_insn(ctx, addd, t3, dst, REG_ARENA); + dst = t3; + } + switch (BPF_SIZE(code)) { case BPF_B: move_imm(ctx, t1, imm, is32); @@ -1138,6 +1180,10 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext } break; } + + ret = add_exception_handler(insn, ctx, REG_DONT_CLEAR_MARKER); + if (ret) + return ret; break; /* *(size *)(dst + off) = src */ @@ -1145,6 +1191,16 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext case BPF_STX | BPF_MEM | BPF_H: case BPF_STX | BPF_MEM | BPF_W: case BPF_STX | BPF_MEM | BPF_DW: + /* STX | PROBE_MEM32: *(size *)(dst + REG_ARENA + off) = src */ + case BPF_STX | BPF_PROBE_MEM32 | BPF_B: + case BPF_STX | BPF_PROBE_MEM32 | BPF_H: + case BPF_STX | BPF_PROBE_MEM32 | BPF_W: + case BPF_STX | BPF_PROBE_MEM32 | BPF_DW: + if (BPF_MODE(code) == BPF_PROBE_MEM32) { + emit_insn(ctx, addd, t2, dst, REG_ARENA); + dst = t2; + } + switch (BPF_SIZE(code)) { case BPF_B: if (is_signed_imm12(off)) { @@ -1183,6 +1239,10 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext } break; } + + ret = add_exception_handler(insn, ctx, REG_DONT_CLEAR_MARKER); + if (ret) + return ret; break; case BPF_STX | BPF_ATOMIC | BPF_W: @@ -1894,6 +1954,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) memset(&ctx, 0, sizeof(ctx)); ctx.prog = prog; + ctx.arena_vm_start = bpf_arena_get_kern_vm_start(prog->aux->arena); ctx.offset = kvcalloc(prog->len + 1, sizeof(u32), GFP_KERNEL); if (ctx.offset == NULL) { diff --git a/arch/loongarch/net/bpf_jit.h b/arch/loongarch/net/bpf_jit.h index 38eb75f79069..8816ac106f2f 100644 --- a/arch/loongarch/net/bpf_jit.h +++ b/arch/loongarch/net/bpf_jit.h @@ -20,6 +20,7 @@ struct jit_ctx { union loongarch_instruction *image; union loongarch_instruction *ro_image; u32 stack_size; + u64 arena_vm_start; }; struct jit_data { From 4fdb5dd8aeba3a6b5ffc9c66fd0c8528fd835065 Mon Sep 17 00:00:00 2001 From: Hengqi Chen Date: Tue, 10 Feb 2026 19:31:18 +0800 Subject: [PATCH 18/20] LoongArch: BPF: Implement bpf_addr_space_cast instruction LLVM generates bpf_addr_space_cast instruction while translating pointers between native (zero) address space and __attribute__((address_space(N))). The addr_space=0 is reserved as bpf_arena address space. rY = addr_space_cast(rX, 0, 1) is processed by the verifier and converted to normal 32-bit move: wX = wY rY = addr_space_cast(rX, 1, 0) has to be converted by JIT. With this, the following test cases passed: $ ./test_progs -a arena_htab,arena_list,arena_strsearch,verifier_arena,verifier_arena_large #4/1 arena_htab/arena_htab_llvm:OK #4/2 arena_htab/arena_htab_asm:OK #4 arena_htab:OK #5/1 arena_list/arena_list_1:OK #5/2 arena_list/arena_list_1000:OK #5 arena_list:OK #7/1 arena_strsearch/arena_strsearch:OK #7 arena_strsearch:OK #507/1 verifier_arena/basic_alloc1:OK #507/2 verifier_arena/basic_alloc2:OK #507/3 verifier_arena/basic_alloc3:OK #507/4 verifier_arena/basic_reserve1:OK #507/5 verifier_arena/basic_reserve2:OK #507/6 verifier_arena/reserve_twice:OK #507/7 verifier_arena/reserve_invalid_region:OK #507/8 verifier_arena/iter_maps1:OK #507/9 verifier_arena/iter_maps2:OK #507/10 verifier_arena/iter_maps3:OK #507 verifier_arena:OK #508/1 verifier_arena_large/big_alloc1:OK #508/2 verifier_arena_large/access_reserved:OK #508/3 verifier_arena_large/request_partially_reserved:OK #508/4 verifier_arena_large/free_reserved:OK #508/5 verifier_arena_large/big_alloc2:OK #508 verifier_arena_large:OK Summary: 5/20 PASSED, 0 SKIPPED, 0 FAILED Acked-by: Tiezhu Yang Tested-by: Vincent Li Signed-off-by: Hengqi Chen Signed-off-by: Huacai Chen --- arch/loongarch/net/bpf_jit.c | 15 +++++++++++++++ arch/loongarch/net/bpf_jit.h | 1 + 2 files changed, 16 insertions(+) diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index b20ecc326a38..ceb7939088cd 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -559,6 +559,15 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext /* dst = src */ case BPF_ALU | BPF_MOV | BPF_X: case BPF_ALU64 | BPF_MOV | BPF_X: + if (insn_is_cast_user(insn)) { + move_reg(ctx, t1, src); + emit_zext_32(ctx, t1, true); + move_imm(ctx, dst, (ctx->user_vm_start >> 32) << 32, false); + emit_insn(ctx, beq, t1, LOONGARCH_GPR_ZERO, 1); + emit_insn(ctx, or, t1, dst, t1); + move_reg(ctx, dst, t1); + break; + } switch (off) { case 0: move_reg(ctx, dst, src); @@ -1955,6 +1964,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) memset(&ctx, 0, sizeof(ctx)); ctx.prog = prog; ctx.arena_vm_start = bpf_arena_get_kern_vm_start(prog->aux->arena); + ctx.user_vm_start = bpf_arena_get_user_vm_start(prog->aux->arena); ctx.offset = kvcalloc(prog->len + 1, sizeof(u32), GFP_KERNEL); if (ctx.offset == NULL) { @@ -2110,6 +2120,11 @@ bool bpf_jit_bypass_spec_v4(void) return true; } +bool bpf_jit_supports_arena(void) +{ + return true; +} + /* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */ bool bpf_jit_supports_subprog_tailcalls(void) { diff --git a/arch/loongarch/net/bpf_jit.h b/arch/loongarch/net/bpf_jit.h index 8816ac106f2f..a8e29be35fa8 100644 --- a/arch/loongarch/net/bpf_jit.h +++ b/arch/loongarch/net/bpf_jit.h @@ -21,6 +21,7 @@ struct jit_ctx { union loongarch_instruction *ro_image; u32 stack_size; u64 arena_vm_start; + u64 user_vm_start; }; struct jit_data { From 4fd5ca0f2c626f59f131c62df1dba9ccf39f074d Mon Sep 17 00:00:00 2001 From: Binbin Zhou Date: Tue, 10 Feb 2026 19:31:21 +0800 Subject: [PATCH 19/20] LoongArch: dts: loongson-2k0500: Add nand controller support The module is supported, enable it. Signed-off-by: Binbin Zhou Signed-off-by: Huacai Chen --- .../boot/dts/loongson-2k0500-ref.dts | 19 +++++++++++++++++++ arch/loongarch/boot/dts/loongson-2k0500.dtsi | 12 +++++++++++- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/boot/dts/loongson-2k0500-ref.dts b/arch/loongarch/boot/dts/loongson-2k0500-ref.dts index 018ed904352a..7ace54c84244 100644 --- a/arch/loongarch/boot/dts/loongson-2k0500-ref.dts +++ b/arch/loongarch/boot/dts/loongson-2k0500-ref.dts @@ -41,6 +41,25 @@ linux,cma { }; }; +&apbdma0 { + status = "okay"; +}; + +&nand { + status = "okay"; + + #address-cells = <1>; + #size-cells = <0>; + nand@0 { + reg = <0>; + label = "ls2k0500-nand"; + nand-use-soft-ecc-engine; + nand-ecc-algo = "bch"; + nand-ecc-strength = <8>; + nand-ecc-step-size = <512>; + }; +}; + &apbdma3 { status = "okay"; }; diff --git a/arch/loongarch/boot/dts/loongson-2k0500.dtsi b/arch/loongarch/boot/dts/loongson-2k0500.dtsi index e759fae77dcf..1b502064df11 100644 --- a/arch/loongarch/boot/dts/loongson-2k0500.dtsi +++ b/arch/loongarch/boot/dts/loongson-2k0500.dtsi @@ -84,7 +84,7 @@ clk: clock-controller@1fe10400 { clock-names = "ref_100m"; }; - dma-controller@1fe10c00 { + apbdma0: dma-controller@1fe10c00 { compatible = "loongson,ls2k0500-apbdma", "loongson,ls2k1000-apbdma"; reg = <0 0x1fe10c00 0 0x8>; interrupt-parent = <&eiointc>; @@ -172,6 +172,16 @@ eiointc: interrupt-controller@1fe11600 { interrupts = <3>; }; + nand: nand-controller@1ff58000 { + compatible = "loongson,ls2k0500-nand-controller"; + reg = <0 0x1ff58000 0 0x24>, + <0 0x1ff58040 0 0x4>; + reg-names = "nand", "nand-dma"; + dmas = <&apbdma0 0>; + dma-names = "rxtx"; + status = "disabled"; + }; + pwm@1ff5c000 { compatible = "loongson,ls2k0500-pwm", "loongson,ls7a-pwm"; reg = <0x0 0x1ff5c000 0x0 0x10>; From 92860256402cce9fa6268763365f5333a56c1428 Mon Sep 17 00:00:00 2001 From: Binbin Zhou Date: Tue, 10 Feb 2026 19:31:33 +0800 Subject: [PATCH 20/20] LoongArch: dts: loongson-2k1000: Add nand controller support The module is supported, enable it. Signed-off-by: Binbin Zhou Signed-off-by: Huacai Chen --- .../boot/dts/loongson-2k1000-ref.dts | 22 +++++++++++++++++++ arch/loongarch/boot/dts/loongson-2k1000.dtsi | 13 ++++++++++- 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/boot/dts/loongson-2k1000-ref.dts b/arch/loongarch/boot/dts/loongson-2k1000-ref.dts index d9a452ada5d7..51b8e53cb608 100644 --- a/arch/loongarch/boot/dts/loongson-2k1000-ref.dts +++ b/arch/loongarch/boot/dts/loongson-2k1000-ref.dts @@ -48,6 +48,28 @@ fan0: pwm-fan { }; }; +&apbdma0 { + status = "okay"; +}; + +&nand { + status = "okay"; + + pinctrl-0 = <&nand_pins_default>; + pinctrl-names = "default"; + + #address-cells = <1>; + #size-cells = <0>; + nand@0 { + reg = <0>; + label = "ls2k1000-nand"; + nand-use-soft-ecc-engine; + nand-ecc-algo = "bch"; + nand-ecc-strength = <8>; + nand-ecc-step-size = <512>; + }; +}; + &apbdma1 { status = "okay"; }; diff --git a/arch/loongarch/boot/dts/loongson-2k1000.dtsi b/arch/loongarch/boot/dts/loongson-2k1000.dtsi index be4f7d119660..ab6a55937e9e 100644 --- a/arch/loongarch/boot/dts/loongson-2k1000.dtsi +++ b/arch/loongarch/boot/dts/loongson-2k1000.dtsi @@ -248,7 +248,7 @@ tsensor: thermal-sensor@1fe01500 { #thermal-sensor-cells = <1>; }; - dma-controller@1fe00c00 { + apbdma0: dma-controller@1fe00c00 { compatible = "loongson,ls2k1000-apbdma"; reg = <0x0 0x1fe00c00 0x0 0x8>; interrupt-parent = <&liointc1>; @@ -364,6 +364,17 @@ pwm@1fe22030 { status = "disabled"; }; + nand: nand-controller@1fe26000 { + compatible = "loongson,ls2k1000-nand-controller"; + reg = <0 0x1fe26000 0 0x24>, + <0 0x1fe26040 0 0x4>, + <0 0x1fe00438 0 0x8>; + reg-names = "nand", "nand-dma", "dma-config"; + dmas = <&apbdma0 0>; + dma-names = "rxtx"; + status = "disabled"; + }; + pmc: power-management@1fe27000 { compatible = "loongson,ls2k1000-pmc", "loongson,ls2k0500-pmc", "syscon"; reg = <0x0 0x1fe27000 0x0 0x58>;