From 529676cabcf4a5046d217bba2c8f3b94a3f6a10f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 13 Nov 2025 15:45:45 +0000 Subject: [PATCH 1/3] x86/lib: Inline csum_ipv6_magic() Inline this small helper. It has been observed to consume up to 0.75%, which is significant for such a small function. This should reduce register pressure, as saddr and daddr are often back to back in memory. For instance code inlined in tcp6_gro_receive() will look like: 55a: 48 03 73 28 add 0x28(%rbx),%rsi 55e: 8b 43 70 mov 0x70(%rbx),%eax 561: 29 f8 sub %edi,%eax 563: 0f c8 bswap %eax 565: 89 c0 mov %eax,%eax 567: 48 05 00 06 00 00 add $0x600,%rax 56d: 48 03 46 08 add 0x8(%rsi),%rax 571: 48 13 46 10 adc 0x10(%rsi),%rax 575: 48 13 46 18 adc 0x18(%rsi),%rax 579: 48 13 46 20 adc 0x20(%rsi),%rax 57d: 48 83 d0 00 adc $0x0,%rax 581: 48 89 c6 mov %rax,%rsi 584: 48 c1 ee 20 shr $0x20,%rsi 588: 01 f0 add %esi,%eax 58a: 83 d0 00 adc $0x0,%eax 58d: 89 c6 mov %eax,%esi 58f: 66 31 c0 xor %ax,%ax Surprisingly, this inlining does not seem to bloat kernel text size. It at least two cases[1], it either has no effect or results in a slightly smaller kernel. 1. https://lore.kernel.org/all/CANn89iJzcb_XO9oCApKYfRxsMMmg7BHukRDqWTca3ZLQ8HT0iQ@mail.gmail.com/ [ dhansen: add justification and note about lack of kernel bloat ] Signed-off-by: Eric Dumazet Signed-off-by: Dave Hansen Acked-by: Dave Hansen Link: https://patch.msgid.link/20251113154545.594580-1-edumazet@google.com --- arch/x86/include/asm/checksum_64.h | 47 +++++++++++++++++++++--------- arch/x86/lib/csum-wrappers_64.c | 22 -------------- 2 files changed, 34 insertions(+), 35 deletions(-) diff --git a/arch/x86/include/asm/checksum_64.h b/arch/x86/include/asm/checksum_64.h index 4d4a47a3a8ab..5bdfd2db2b5a 100644 --- a/arch/x86/include/asm/checksum_64.h +++ b/arch/x86/include/asm/checksum_64.h @@ -9,6 +9,7 @@ */ #include +#include #include /** @@ -145,6 +146,17 @@ extern __wsum csum_partial_copy_nocheck(const void *src, void *dst, int len); */ extern __sum16 ip_compute_csum(const void *buff, int len); +static inline unsigned add32_with_carry(unsigned a, unsigned b) +{ + asm("addl %2,%0\n\t" + "adcl $0,%0" + : "=r" (a) + : "0" (a), "rm" (b)); + return a; +} + +#define _HAVE_ARCH_IPV6_CSUM 1 + /** * csum_ipv6_magic - Compute checksum of an IPv6 pseudo header. * @saddr: source address @@ -158,20 +170,29 @@ extern __sum16 ip_compute_csum(const void *buff, int len); * Returns the unfolded 32bit checksum. */ -struct in6_addr; - -#define _HAVE_ARCH_IPV6_CSUM 1 -extern __sum16 -csum_ipv6_magic(const struct in6_addr *saddr, const struct in6_addr *daddr, - __u32 len, __u8 proto, __wsum sum); - -static inline unsigned add32_with_carry(unsigned a, unsigned b) +static inline __sum16 csum_ipv6_magic( + const struct in6_addr *_saddr, const struct in6_addr *_daddr, + __u32 len, __u8 proto, __wsum sum) { - asm("addl %2,%0\n\t" - "adcl $0,%0" - : "=r" (a) - : "0" (a), "rm" (b)); - return a; + const unsigned long *saddr = (const unsigned long *)_saddr; + const unsigned long *daddr = (const unsigned long *)_daddr; + __u64 sum64; + + sum64 = (__force __u64)htonl(len) + (__force __u64)htons(proto) + + (__force __u64)sum; + + asm(" addq %1,%[sum64]\n" + " adcq %2,%[sum64]\n" + " adcq %3,%[sum64]\n" + " adcq %4,%[sum64]\n" + " adcq $0,%[sum64]\n" + + : [sum64] "+r" (sum64) + : "m" (saddr[0]), "m" (saddr[1]), + "m" (daddr[0]), "m" (daddr[1])); + + return csum_fold( + (__force __wsum)add32_with_carry(sum64 & 0xffffffff, sum64>>32)); } #define HAVE_ARCH_CSUM_ADD diff --git a/arch/x86/lib/csum-wrappers_64.c b/arch/x86/lib/csum-wrappers_64.c index f4df4d241526..831b7110b041 100644 --- a/arch/x86/lib/csum-wrappers_64.c +++ b/arch/x86/lib/csum-wrappers_64.c @@ -68,25 +68,3 @@ csum_partial_copy_nocheck(const void *src, void *dst, int len) } EXPORT_SYMBOL(csum_partial_copy_nocheck); -__sum16 csum_ipv6_magic(const struct in6_addr *saddr, - const struct in6_addr *daddr, - __u32 len, __u8 proto, __wsum sum) -{ - __u64 rest, sum64; - - rest = (__force __u64)htonl(len) + (__force __u64)htons(proto) + - (__force __u64)sum; - - asm(" addq (%[saddr]),%[sum]\n" - " adcq 8(%[saddr]),%[sum]\n" - " adcq (%[daddr]),%[sum]\n" - " adcq 8(%[daddr]),%[sum]\n" - " adcq $0,%[sum]\n" - - : [sum] "=r" (sum64) - : "[sum]" (rest), [saddr] "r" (saddr), [daddr] "r" (daddr)); - - return csum_fold( - (__force __wsum)add32_with_carry(sum64 & 0xffffffff, sum64>>32)); -} -EXPORT_SYMBOL(csum_ipv6_magic); From 6b45ded3f714e78c20708c0f29852fba856fec0c Mon Sep 17 00:00:00 2001 From: Richard Lyu Date: Tue, 6 Jan 2026 13:28:17 +0800 Subject: [PATCH 2/3] Documentation/x86: Update IOMMU spec references to use stable identifiers Direct URLs to vendor specifications for Intel VT-d and AMD IOMMU are frequently changed by vendors, leading to broken links in the documentation. Replace the fragile URLs with persistent identifiers, providing the official document titles and IDs. This ensures users can locate the relevant specifications regardless of vendor website restructuring. Signed-off-by: Richard Lyu Signed-off-by: Dave Hansen Link: https://patch.msgid.link/20260106052815.46114-1-richard.lyu@suse.com --- Documentation/arch/x86/iommu.rst | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Documentation/arch/x86/iommu.rst b/Documentation/arch/x86/iommu.rst index 41fbadfe2221..79c33560299b 100644 --- a/Documentation/arch/x86/iommu.rst +++ b/Documentation/arch/x86/iommu.rst @@ -2,10 +2,11 @@ x86 IOMMU Support ================= -The architecture specs can be obtained from the below locations. +The architecture specs can be obtained from the vendor websites. +Search for the following documents to obtain the latest versions: -- Intel: http://www.intel.com/content/dam/www/public/us/en/documents/product-specifications/vt-directed-io-spec.pdf -- AMD: https://www.amd.com/content/dam/amd/en/documents/processor-tech-docs/specifications/48882_3_07_PUB.pdf +- Intel: Intel Virtualization Technology for Directed I/O Architecture Specification (ID: D51397) +- AMD: AMD I/O Virtualization Technology (IOMMU) Specification (ID: 48882) This guide gives a quick cheat sheet for some basic understanding. From 6b32c93560cb194e10279bd3be3c1d0fa30df3e7 Mon Sep 17 00:00:00 2001 From: Ryosuke Yasuoka Date: Wed, 24 Dec 2025 16:07:32 +0900 Subject: [PATCH 3/3] x86/traps: Print unhashed pointers on stack overflow When a stack overflow occurs, the kernel prints hashed fault address and the stack range using %p. The actual addresses are required for debugging and hashed pointers provide no useful information in this context. Use %px to print the unhashed, raw addresses. Signed-off-by: Ryosuke Yasuoka Signed-off-by: Borislav Petkov (AMD) Link: https://patch.msgid.link/20251224070735.454816-1-ryasuoka@redhat.com --- arch/x86/kernel/traps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index bcf1dedc1d00..5a6a772e0a6c 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -549,7 +549,7 @@ __visible void __noreturn handle_stack_overflow(struct pt_regs *regs, { const char *name = stack_type_name(info->type); - printk(KERN_EMERG "BUG: %s stack guard page was hit at %p (stack is %p..%p)\n", + printk(KERN_EMERG "BUG: %s stack guard page was hit at %px (stack is %px..%px)\n", name, (void *)fault_address, info->begin, info->end); die("stack guard page", regs, 0);