From a3ff53167cef2b5c6c8948246172d6f9279f037f Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Tue, 19 Mar 2024 11:40:12 +0100 Subject: [PATCH 1/6] x86/asm: Remove %P operand modifier from altinstr asm templates The "P" asm operand modifier is a x86 target-specific modifier. For x86_64, when used with a symbol reference, the "%P" modifier emits "sym" instead of "sym(%rip)". This property is currently used to prevent %RIP-relative addressing in .altinstr sections. %RIP-relative addresses are nowadays correctly handled in .altinstr sections, so remove %P operand modifier from altinstr asm templates. Also note that unlike GCC, clang emits %rip-relative symbol reference with "P" asm operand modifier, so the patch also unifies symbol handling with both compilers. No functional changes intended. Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Cc: Linus Torvalds Cc: Josh Poimboeuf Cc: Ard Biesheuvel Cc: "H. Peter Anvin" Link: https://lore.kernel.org/r/20240319104418.284519-2-ubizjak@gmail.com --- arch/x86/include/asm/apic.h | 2 +- arch/x86/include/asm/processor.h | 6 +++--- arch/x86/include/asm/special_insns.h | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 94ce0f7c9d3a..fa2e4244654e 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -91,7 +91,7 @@ static inline void native_apic_mem_write(u32 reg, u32 v) { volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg); - alternative_io("movl %0, %P1", "xchgl %0, %P1", X86_BUG_11AP, + alternative_io("movl %0, %1", "xchgl %0, %1", X86_BUG_11AP, ASM_OUTPUT2("=r" (v), "=m" (*addr)), ASM_OUTPUT2("0" (v), "m" (*addr))); } diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 811548f131f4..438c0c8f596a 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -587,7 +587,7 @@ extern char ignore_fpu_irq; # define BASE_PREFETCH "" # define ARCH_HAS_PREFETCH #else -# define BASE_PREFETCH "prefetcht0 %P1" +# define BASE_PREFETCH "prefetcht0 %1" #endif /* @@ -598,7 +598,7 @@ extern char ignore_fpu_irq; */ static inline void prefetch(const void *x) { - alternative_input(BASE_PREFETCH, "prefetchnta %P1", + alternative_input(BASE_PREFETCH, "prefetchnta %1", X86_FEATURE_XMM, "m" (*(const char *)x)); } @@ -610,7 +610,7 @@ static inline void prefetch(const void *x) */ static __always_inline void prefetchw(const void *x) { - alternative_input(BASE_PREFETCH, "prefetchw %P1", + alternative_input(BASE_PREFETCH, "prefetchw %1", X86_FEATURE_3DNOWPREFETCH, "m" (*(const char *)x)); } diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index 2e9fc5c400cd..0ee2ba589492 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -182,8 +182,8 @@ static __always_inline void clflush(volatile void *__p) static inline void clflushopt(volatile void *__p) { - alternative_io(".byte 0x3e; clflush %P0", - ".byte 0x66; clflush %P0", + alternative_io(".byte 0x3e; clflush %0", + ".byte 0x66; clflush %0", X86_FEATURE_CLFLUSHOPT, "+m" (*(volatile char __force *)__p)); } From 41cd2e1ee96e56401a18dbce6f42f0bdaebcbf3b Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Tue, 19 Mar 2024 11:40:13 +0100 Subject: [PATCH 2/6] x86/asm: Use %c/%n instead of %P operand modifier in asm templates The "P" asm operand modifier is a x86 target-specific modifier. When used with a constant, the "P" modifier emits "cst" instead of "$cst". This property is currently used to emit the bare constant without all syntax-specific prefixes. The generic "c" resp. "n" operand modifier should be used instead. No functional changes intended. Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Cc: Linus Torvalds Cc: Josh Poimboeuf Cc: Ard Biesheuvel Cc: "H. Peter Anvin" Link: https://lore.kernel.org/r/20240319104418.284519-3-ubizjak@gmail.com --- arch/x86/boot/main.c | 4 ++-- arch/x86/include/asm/alternative.h | 22 +++++++++++----------- arch/x86/include/asm/atomic64_32.h | 2 +- arch/x86/include/asm/cpufeature.h | 2 +- arch/x86/include/asm/irq_stack.h | 2 +- arch/x86/include/asm/uaccess.h | 4 ++-- 6 files changed, 18 insertions(+), 18 deletions(-) diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c index c4ea5258ab55..9049f390d834 100644 --- a/arch/x86/boot/main.c +++ b/arch/x86/boot/main.c @@ -119,8 +119,8 @@ static void init_heap(void) char *stack_end; if (boot_params.hdr.loadflags & CAN_USE_HEAP) { - asm("leal %P1(%%esp),%0" - : "=r" (stack_end) : "i" (-STACK_SIZE)); + asm("leal %n1(%%esp),%0" + : "=r" (stack_end) : "i" (STACK_SIZE)); heap_end = (char *) ((size_t)boot_params.hdr.heap_end_ptr + 0x200); diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index fcd20c6dc7f9..99432696b181 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -294,10 +294,10 @@ static inline int alternatives_text_reserved(void *start, void *end) * Otherwise, if CPU has feature1, newinstr1 is used. * Otherwise, oldinstr is used. */ -#define alternative_input_2(oldinstr, newinstr1, ft_flags1, newinstr2, \ - ft_flags2, input...) \ - asm_inline volatile(ALTERNATIVE_2(oldinstr, newinstr1, ft_flags1, \ - newinstr2, ft_flags2) \ +#define alternative_input_2(oldinstr, newinstr1, ft_flags1, newinstr2, \ + ft_flags2, input...) \ + asm_inline volatile(ALTERNATIVE_2(oldinstr, newinstr1, ft_flags1, \ + newinstr2, ft_flags2) \ : : "i" (0), ## input) /* Like alternative_input, but with a single output argument */ @@ -307,7 +307,7 @@ static inline int alternatives_text_reserved(void *start, void *end) /* Like alternative_io, but for replacing a direct call with another one. */ #define alternative_call(oldfunc, newfunc, ft_flags, output, input...) \ - asm_inline volatile (ALTERNATIVE("call %P[old]", "call %P[new]", ft_flags) \ + asm_inline volatile (ALTERNATIVE("call %c[old]", "call %c[new]", ft_flags) \ : output : [old] "i" (oldfunc), [new] "i" (newfunc), ## input) /* @@ -316,12 +316,12 @@ static inline int alternatives_text_reserved(void *start, void *end) * Otherwise, if CPU has feature1, function1 is used. * Otherwise, old function is used. */ -#define alternative_call_2(oldfunc, newfunc1, ft_flags1, newfunc2, ft_flags2, \ - output, input...) \ - asm_inline volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", ft_flags1,\ - "call %P[new2]", ft_flags2) \ - : output, ASM_CALL_CONSTRAINT \ - : [old] "i" (oldfunc), [new1] "i" (newfunc1), \ +#define alternative_call_2(oldfunc, newfunc1, ft_flags1, newfunc2, ft_flags2, \ + output, input...) \ + asm_inline volatile (ALTERNATIVE_2("call %c[old]", "call %c[new1]", ft_flags1, \ + "call %c[new2]", ft_flags2) \ + : output, ASM_CALL_CONSTRAINT \ + : [old] "i" (oldfunc), [new1] "i" (newfunc1), \ [new2] "i" (newfunc2), ## input) /* diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h index 3486d91b8595..d510405e4e1d 100644 --- a/arch/x86/include/asm/atomic64_32.h +++ b/arch/x86/include/asm/atomic64_32.h @@ -24,7 +24,7 @@ typedef struct { #ifdef CONFIG_X86_CMPXCHG64 #define __alternative_atomic64(f, g, out, in...) \ - asm volatile("call %P[func]" \ + asm volatile("call %c[func]" \ : out : [func] "i" (atomic64_##g##_cx8), ## in) #define ATOMIC64_DECL(sym) ATOMIC64_DECL_ONE(sym##_cx8) diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index a1273698fc43..fa938ed96506 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -169,7 +169,7 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit); static __always_inline bool _static_cpu_has(u16 bit) { asm goto( - ALTERNATIVE_TERNARY("jmp 6f", %P[feature], "", "jmp %l[t_no]") + ALTERNATIVE_TERNARY("jmp 6f", %c[feature], "", "jmp %l[t_no]") ".pushsection .altinstr_aux,\"ax\"\n" "6:\n" " testb %[bitnum]," _ASM_RIP(%P[cap_byte]) "\n" diff --git a/arch/x86/include/asm/irq_stack.h b/arch/x86/include/asm/irq_stack.h index 798183867d78..b71ad173f877 100644 --- a/arch/x86/include/asm/irq_stack.h +++ b/arch/x86/include/asm/irq_stack.h @@ -100,7 +100,7 @@ } #define ASM_CALL_ARG0 \ - "call %P[__func] \n" \ + "call %c[__func] \n" \ ASM_REACHABLE #define ASM_CALL_ARG1 \ diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 237dc8cdd12b..0f9bab92a43d 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -78,7 +78,7 @@ extern int __get_user_bad(void); int __ret_gu; \ register __inttype(*(ptr)) __val_gu asm("%"_ASM_DX); \ __chk_user_ptr(ptr); \ - asm volatile("call __" #fn "_%P4" \ + asm volatile("call __" #fn "_%c4" \ : "=a" (__ret_gu), "=r" (__val_gu), \ ASM_CALL_CONSTRAINT \ : "0" (ptr), "i" (sizeof(*(ptr)))); \ @@ -177,7 +177,7 @@ extern void __put_user_nocheck_8(void); __chk_user_ptr(__ptr); \ __ptr_pu = __ptr; \ __val_pu = __x; \ - asm volatile("call __" #fn "_%P[size]" \ + asm volatile("call __" #fn "_%c[size]" \ : "=c" (__ret_pu), \ ASM_CALL_CONSTRAINT \ : "0" (__ptr_pu), \ From d689863c1a60b9936b47a34fa5c3330de374f4fc Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Tue, 19 Mar 2024 11:40:14 +0100 Subject: [PATCH 3/6] x86/asm: Use %a instead of %P operand modifier in asm templates The "P" asm operand modifier is a x86 target-specific modifier. For x86_64, when used with a symbol reference, the "%P" modifier emits "sym" instead of "sym(%rip)". This property is currently used to issue bare symbol reference. The generic "a" operand modifier should be used instead. The "a" asm operand modifier substitutes a memory reference, with the actual operand treated as address. For x86_64, when a symbol is provided, the "a" modifier emits "sym(%rip)" instead of "sym", enabling shorter %rip-relative addressing. Also note that unlike GCC, clang emits %rip-relative symbol reference with "P" asm operand modifier, so the patch also unifies symbol handling with both compilers. No functional changes intended. Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Cc: Linus Torvalds Cc: Josh Poimboeuf Cc: Ard Biesheuvel Cc: "H. Peter Anvin" Link: https://lore.kernel.org/r/20240319104418.284519-4-ubizjak@gmail.com --- arch/x86/include/asm/cpufeature.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index fa938ed96506..daae5c6e7d0e 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -172,7 +172,7 @@ static __always_inline bool _static_cpu_has(u16 bit) ALTERNATIVE_TERNARY("jmp 6f", %c[feature], "", "jmp %l[t_no]") ".pushsection .altinstr_aux,\"ax\"\n" "6:\n" - " testb %[bitnum]," _ASM_RIP(%P[cap_byte]) "\n" + " testb %[bitnum], %a[cap_byte]\n" " jnz %l[t_yes]\n" " jmp %l[t_no]\n" ".popsection\n" From 648337147d3550c1ca3d1b500e66dbda12e2d836 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Tue, 19 Mar 2024 18:16:46 +0100 Subject: [PATCH 4/6] x86/asm: Use "m" operand constraint in WRUSSQ asm template The WRUSSQ instruction uses a memory operand, so use "m" operand constraint instead of forcing usage of pointer register using "r" constraint. The generated assembly code improves from: 6ece3: 48 8d 43 f8 lea -0x8(%rbx),%rax ... 6eceb: 66 48 0f 38 f5 18 wrussq %rbx,(%rax) to: 6ecea: 66 48 0f 38 f5 43 f8 wrussq %rax,-0x8(%rbx) Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Cc: Linus Torvalds Cc: Josh Poimboeuf Cc: Ard Biesheuvel Cc: "H. Peter Anvin" Link: https://lore.kernel.org/r/20240319171714.76342-1-ubizjak@gmail.com --- arch/x86/include/asm/special_insns.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index 0ee2ba589492..aec6e2d3aa1d 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -205,9 +205,9 @@ static inline void clwb(volatile void *__p) #ifdef CONFIG_X86_USER_SHADOW_STACK static inline int write_user_shstk_64(u64 __user *addr, u64 val) { - asm goto("1: wrussq %[val], (%[addr])\n" + asm goto("1: wrussq %[val], %[addr]\n" _ASM_EXTABLE(1b, %l[fail]) - :: [addr] "r" (addr), [val] "r" (val) + :: [addr] "m" (*addr), [val] "r" (val) :: fail); return 0; fail: From 4c9a93800121e90484cd07c8e5bde70e31cdb996 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 14 Mar 2024 19:57:15 +0300 Subject: [PATCH 5/6] x86/asm/64: Clean up memset16(), memset32(), memset64() assembly constraints in - Use "+" constraint modifier, simplify inputs and output lists, delete dummy variables with meaningless names, "&" only makes sense in complex assembly creating constraints on intermediate registers. But 1 instruction assemblies don't have inner body so to speak. - Write "rep stos*" on one line: Rep prefix is integral part of x86 instruction. I'm not sure why people separate "rep" with newline. Uros Bizjak adds context: "some archaic assemblers rejected 'rep insn' on one line. I have checked that the minimum required binutils-2.25 assembles this without problems." - Use __auto_type for maximum copy pasta experience, - Reformat a bit to make everything looks nicer. Note that "memory" clobber is too much if "n" is known at compile time. However, "=m" (*(T(*)[n])s) doesn't work because -Wvla even if "n" is compile time constant: if (BCP(n)) { rep stos : "=m" (*(T(*)[n])s) } else { rep stosw : "memory" } The above doesn't work. Signed-off-by: Alexey Dobriyan Signed-off-by: Ingo Molnar Reviewed-by: Uros Bizjak Cc: Linus Torvalds Cc: Andy Lutomirski Cc: "H. Peter Anvin" Link: https://lore.kernel.org/r/20240314165715.31831-1-adobriyan@gmail.com --- arch/x86/include/asm/string_64.h | 45 +++++++++++++++++--------------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h index 857d364b9888..9d0b324eab21 100644 --- a/arch/x86/include/asm/string_64.h +++ b/arch/x86/include/asm/string_64.h @@ -30,37 +30,40 @@ void *__memset(void *s, int c, size_t n); #define __HAVE_ARCH_MEMSET16 static inline void *memset16(uint16_t *s, uint16_t v, size_t n) { - long d0, d1; - asm volatile("rep\n\t" - "stosw" - : "=&c" (d0), "=&D" (d1) - : "a" (v), "1" (s), "0" (n) - : "memory"); - return s; + const __auto_type s0 = s; + asm volatile ( + "rep stosw" + : "+D" (s), "+c" (n) + : "a" (v) + : "memory" + ); + return s0; } #define __HAVE_ARCH_MEMSET32 static inline void *memset32(uint32_t *s, uint32_t v, size_t n) { - long d0, d1; - asm volatile("rep\n\t" - "stosl" - : "=&c" (d0), "=&D" (d1) - : "a" (v), "1" (s), "0" (n) - : "memory"); - return s; + const __auto_type s0 = s; + asm volatile ( + "rep stosl" + : "+D" (s), "+c" (n) + : "a" (v) + : "memory" + ); + return s0; } #define __HAVE_ARCH_MEMSET64 static inline void *memset64(uint64_t *s, uint64_t v, size_t n) { - long d0, d1; - asm volatile("rep\n\t" - "stosq" - : "=&c" (d0), "=&D" (d1) - : "a" (v), "1" (s), "0" (n) - : "memory"); - return s; + const __auto_type s0 = s; + asm volatile ( + "rep stosq" + : "+D" (s), "+c" (n) + : "a" (v) + : "memory" + ); + return s0; } #endif From a0c8cf9780359376496bbd6d2be1343badf68af7 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Thu, 4 Apr 2024 12:04:25 +0200 Subject: [PATCH 6/6] x86/alternatives: Remove a superfluous newline in _static_cpu_has() No functional changes. Signed-off-by: Borislav Petkov (AMD) Signed-off-by: Ingo Molnar Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240327154317.29909-2-bp@alien8.de --- arch/x86/include/asm/cpufeature.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index daae5c6e7d0e..cd90ceff6d2f 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -168,8 +168,7 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit); */ static __always_inline bool _static_cpu_has(u16 bit) { - asm goto( - ALTERNATIVE_TERNARY("jmp 6f", %c[feature], "", "jmp %l[t_no]") + asm goto(ALTERNATIVE_TERNARY("jmp 6f", %c[feature], "", "jmp %l[t_no]") ".pushsection .altinstr_aux,\"ax\"\n" "6:\n" " testb %[bitnum], %a[cap_byte]\n"