From ea1dcca1de129dfdf145338a868648bc0e24717c Mon Sep 17 00:00:00 2001 From: Tor Vic Date: Fri, 21 Mar 2025 15:28:58 +0100 Subject: [PATCH 1/3] x86/kbuild/64: Add the CONFIG_X86_NATIVE_CPU option to locally optimize the kernel with '-march=native' Add a 'native' option that allows users to build an optimized kernel for their local machine (i.e. the machine which is used to build the kernel) by passing '-march=native' to CFLAGS. The idea comes from Linus' reply to Arnd's initial proposal: https://lore.kernel.org/all/CAHk-=wji1sV93yKbc==Z7OSSHBiDE=LAdG_d5Y-zPBrnSs0k2A@mail.gmail.com/ Here are some numbers comparing 'generic' to 'native' on a Skylake dual-core laptop (generic --> native): - vmlinux and compressed modules size: 125'907'744 bytes --> 125'595'280 bytes (-0.248 %) 18'810 kilobytes --> 18'770 kilobytes (-0.213 %) - phoronix, average of 3 runs: ffmpeg: 130.99 --> 131.15 (+0.122 %) nginx: 10'650 --> 10'725 (+0.704 %) hackbench (lower is better): 102.27 --> 99.50 (-2.709 %) - xz compression of firefox tarball (lower is better): 319.57 seconds --> 317.34 seconds (-0.698 %) - stress-ng, bogoops, average of 3 15-second runs: fork: 111'744 --> 115'509 (+3.397 %) bsearch: 7'211 --> 7'436 (+3.120 %) memfd: 3'591 --> 3'604 (+0.362 %) mmapfork: 630 --> 629 (-0.159 %) schedmix: 42'715 --> 43'251 (+1.255 %) epoll: 2'443'767 --> 2'454'413 (+0.436 %) vm: 1'442'256 --> 1'486'615 (+3.076 %) - schbench (two message threads), 30-second runs: 304 rps --> 305 rps (+0.329 %) There is little difference both in terms of size and of performance, however the native build comes out on top ever so slightly. [ mingo: Renamed the option to CONFIG_X86_NATIVE_CPU, expanded the help text and added Linus's Suggested-by tag. ] Suggested-by: Linus Torvalds Signed-off-by: Tor Vic Signed-off-by: Ingo Molnar Cc: Andy Lutomirski Cc: Brian Gerst Cc: Juergen Gross Cc: H. Peter Anvin Cc: Kees Cook Cc: Josh Poimboeuf Link: https://lore.kernel.org/r/20250321142859.13889-1-torvic9@mailbox.org --- arch/x86/Kconfig.cpu | 14 ++++++++++++++ arch/x86/Makefile | 5 +++++ 2 files changed, 19 insertions(+) diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 753b8763abae..9d108a54c30a 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -245,6 +245,20 @@ config MATOM endchoice +config X86_NATIVE_CPU + bool "Build and optimize for local/native CPU" + depends on X86_64 + default n + help + Optimize for the current CPU used to compile the kernel. + Use this option if you intend to build the kernel for your + local machine. + + Note that such a kernel might not work optimally on a + different x86 machine. + + If unsure, say N. + config X86_GENERIC bool "Generic x86 support" depends on X86_32 diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 0fc7e8fd1a2e..436635e005e5 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -173,8 +173,13 @@ else # Use -mskip-rax-setup if supported. KBUILD_CFLAGS += $(call cc-option,-mskip-rax-setup) +ifdef CONFIG_X86_NATIVE_CPU + KBUILD_CFLAGS += -march=native + KBUILD_RUSTFLAGS += -Ctarget-cpu=native +else KBUILD_CFLAGS += -march=x86-64 -mtune=generic KBUILD_RUSTFLAGS += -Ctarget-cpu=x86-64 -Ztune-cpu=generic +endif KBUILD_CFLAGS += -mno-red-zone KBUILD_CFLAGS += -mcmodel=kernel From 01412081863aa81db47423d5719f2726d2a00a32 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 24 Mar 2025 08:05:19 +0100 Subject: [PATCH 2/3] x86/kbuild/64: Test for the availability of the -mtune=native compiler flag Stephen reported this build failure when cross-compiling: cc1: error: bad value 'native' for '-march=' switch Test for the availability of the -march=native flag. Reported-by: Stephen Rothwell Signed-off-by: Ingo Molnar Tested-by: Stephen Rothwell # build test Cc: Tor Vic Cc: Andy Lutomirski Cc: Brian Gerst Cc: Juergen Gross Cc: H. Peter Anvin Cc: Kees Cook Cc: Josh Poimboeuf Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250324172723.49fb0416@canb.auug.org.au --- arch/x86/Kconfig.cpu | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 9d108a54c30a..87bede96e800 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -245,10 +245,14 @@ config MATOM endchoice +config CC_HAS_MARCH_NATIVE + # This flag might not be available in cross-compilers: + def_bool $(cc-option, -march=native) + config X86_NATIVE_CPU bool "Build and optimize for local/native CPU" depends on X86_64 - default n + depends on CC_HAS_MARCH_NATIVE help Optimize for the current CPU used to compile the kernel. Use this option if you intend to build the kernel for your From ad9b861824ac55d81431815fffaaff5e981badc1 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 24 Mar 2025 20:23:00 -0700 Subject: [PATCH 3/3] x86/kbuild/64: Restrict clang versions that can use '-march=native' There are two issues that can appear when building with clang and '-march=native'. The first issue is a compiler crash in the ipv6 stack with clang-18, such as when building allmodconfig. This was frequently reported on the LLVM issue tracker: # Link: https://github.com/llvm/llvm-project/issues?q=is%3Aissue%20ip6_rcv_core Stack dump: 0. Program arguments: clang ... -march=native ... net/ipv6/ip6_input.c 1. parser at end of file 2. Code generation 3. Running pass 'Function Pass Manager' on module 'net/ipv6/ip6_input.c'. 4. Running pass 'X86 DAG->DAG Instruction Selection' on function '@ip6_rcv_core' The second issue is certain -Wframe-larger-than warnings that appear with clang versions prior to 19, which introduced an optimization that produces much better code: # Link: https://github.com/llvm/llvm-project/commit/90ba33099cbb17e7c159e9ebc5a512037db99d6d [2] clang-18: drivers/media/pci/saa7164/saa7164-core.c:605:20: error: stack frame size (2392) exceeds limit (2048) in 'saa7164_irq' [-Werror,-Wframe-larger-than] 605 | static irqreturn_t saa7164_irq(int irq, void *dev_id) | ^ clang-19: drivers/media/pci/saa7164/saa7164-core.c:605:20: error: stack frame size (216) exceeds limit (128) in 'saa7164_irq' [-Werror,-Wframe-larger-than] 605 | static irqreturn_t saa7164_irq(int irq, void *dev_id) | ^ Restrict the testing of the availability of '-march=native' to all supported GCC versions and clang-19 and newer to avoid these known resolved issues. Fixes: 0480bc7e65dc ("x86/kbuild/64: Add the CONFIG_X86_NATIVE_CPU option to locally optimize the kernel with '-march=native'") Signed-off-by: Nathan Chancellor Signed-off-by: Ingo Molnar Cc: Nick Desaulniers Cc: Tor Vic Cc: Andy Lutomirski Cc: Brian Gerst Cc: Juergen Gross Cc: H. Peter Anvin Cc: Kees Cook Cc: Josh Poimboeuf Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250324-x86-march-native-clang-19-and-newer-v1-1-3a05ed32a89e@kernel.org --- arch/x86/Kconfig.cpu | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 87bede96e800..f928cf6e3252 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -248,6 +248,12 @@ endchoice config CC_HAS_MARCH_NATIVE # This flag might not be available in cross-compilers: def_bool $(cc-option, -march=native) + # LLVM 18 has an easily triggered internal compiler error in core + # networking code with '-march=native' on certain systems: + # https://github.com/llvm/llvm-project/issues/72026 + # LLVM 19 introduces an optimization that resolves some high stack + # usage warnings that only appear wth '-march=native'. + depends on CC_IS_GCC || CLANG_VERSION >= 190100 config X86_NATIVE_CPU bool "Build and optimize for local/native CPU"