From 6cce3609a1e0dedeef9b4bfdc87d0d4692f691d7 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 13 Jan 2026 20:43:57 +0100 Subject: [PATCH 1/4] s390/preempt: Optimize preempt_count() Provide an inline assembly using alternatives to avoid the need of a base register when reading preempt_count() from lowcore. Use the LLGT instruction, which reads only the least significant 31 bits of preempt_count. This masks out the encoded PREEMPT_NEED_RESCHED bit. Generated code is changed from 000000000046e5d0 : 46e5d0: c0 04 00 00 00 00 jgnop 46e5d0 46e5d6: a7 39 00 00 lghi %r3,0 46e5da: 58 10 33 a8 l %r1,936(%r3) 46e5de: c0 1b 00 ff ff 00 nilf %r1,16776960 46e5e4: a7 74 00 11 jne 46e606 to something like this: 000000000046e5d0 : 46e5d0: c0 04 00 00 00 00 jgnop 46e5d0 46e5d6: e3 10 03 a8 00 17 llgt %r1,936 46e5dc: ec 41 28 b7 00 55 risbgz %r4,%r1,40,55 46e5e2: a7 74 00 0f jne 46e600 Overall savings are only 82 bytes according to bloat-o-meter. This is because of different inlining decisions, and there aren't many preempt_count() users in the kernel. Reviewed-by: Sven Schnelle Signed-off-by: Heiko Carstens --- arch/s390/include/asm/preempt.h | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h index 6ccd033acfe5..6d6a28bee4e7 100644 --- a/arch/s390/include/asm/preempt.h +++ b/arch/s390/include/asm/preempt.h @@ -8,7 +8,10 @@ #include #include -/* We use the MSB mostly because its available */ +/* + * Use MSB so it is possible to read preempt_count with LLGT which + * reads the least significant 31 bits with a single instruction. + */ #define PREEMPT_NEED_RESCHED 0x80000000 /* @@ -23,7 +26,20 @@ */ static __always_inline int preempt_count(void) { - return READ_ONCE(get_lowcore()->preempt_count) & ~PREEMPT_NEED_RESCHED; + unsigned long lc_preempt, count; + + BUILD_BUG_ON(sizeof_field(struct lowcore, preempt_count) != sizeof(int)); + lc_preempt = offsetof(struct lowcore, preempt_count); + /* READ_ONCE(get_lowcore()->preempt_count) & ~PREEMPT_NEED_RESCHED */ + asm_inline( + ALTERNATIVE("llgt %[count],%[offzero](%%r0)\n", + "llgt %[count],%[offalt](%%r0)\n", + ALT_FEATURE(MFEATURE_LOWCORE)) + : [count] "=d" (count) + : [offzero] "i" (lc_preempt), + [offalt] "i" (lc_preempt + LOWCORE_ALT_ADDRESS), + "m" (((struct lowcore *)0)->preempt_count)); + return count; } static __always_inline void preempt_count_set(int pc) From 23ba7d31633da6b0706b4154e4eb74cdfab710ef Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 13 Jan 2026 20:43:58 +0100 Subject: [PATCH 2/4] s390/preempt: Optimize __preemp_count_add()/__preempt_count_sub() Provide an inline assembly using alternatives to avoid the need of a base register due to relocatable lowcore when adding or subtracting small constants from preempt_count. Main user is preempt_disable(), which subtracts one from preempt_count. With this the generated code changes from 10012c: a7 b9 00 00 lghi %r11,0 100130: eb 01 b3 a8 00 6a asi 936(%r11),1 to something like this: 10012c: eb 01 03 a8 00 6a asi 936,1 Kernel image size is reduced by 13kb (bloat-o-meter -t, defconfig, gcc15). Reviewed-by: Sven Schnelle Signed-off-by: Heiko Carstens --- arch/s390/include/asm/preempt.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h index 6d6a28bee4e7..ef6ea3163c27 100644 --- a/arch/s390/include/asm/preempt.h +++ b/arch/s390/include/asm/preempt.h @@ -84,7 +84,17 @@ static __always_inline void __preempt_count_add(int val) */ if (!IS_ENABLED(CONFIG_PROFILE_ALL_BRANCHES)) { if (__builtin_constant_p(val) && (val >= -128) && (val <= 127)) { - __atomic_add_const(val, &get_lowcore()->preempt_count); + unsigned long lc_preempt; + + lc_preempt = offsetof(struct lowcore, preempt_count); + asm_inline( + ALTERNATIVE("asi %[offzero](%%r0),%[val]\n", + "asi %[offalt](%%r0),%[val]\n", + ALT_FEATURE(MFEATURE_LOWCORE)) + : "+m" (((struct lowcore *)0)->preempt_count) + : [offzero] "i" (lc_preempt), [val] "i" (val), + [offalt] "i" (lc_preempt + LOWCORE_ALT_ADDRESS) + : "cc"); return; } } From 05405b8fd284189278636a0392976cbec3bb6d19 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 13 Jan 2026 20:43:59 +0100 Subject: [PATCH 3/4] s390/asm: Let __HAVE_ASM_FLAG_OUTPUTS__ define 1 With the empty define __is_enabled(__HAVE_ASM_FLAG_OUTPUTS__) evaluates to false. Therefore let __HAVE_ASM_FLAG_OUTPUTS__ define 1 if it is defined. This allows to make use of __is_defined(__HAVE_ASM_FLAG_OUTPUTS__) like expected. Reviewed-by: Sven Schnelle Signed-off-by: Heiko Carstens --- arch/s390/include/asm/asm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/include/asm/asm.h b/arch/s390/include/asm/asm.h index e9062b01e2a2..510901c2a5f9 100644 --- a/arch/s390/include/asm/asm.h +++ b/arch/s390/include/asm/asm.h @@ -30,7 +30,7 @@ */ #if defined(__GCC_ASM_FLAG_OUTPUTS__) && !(IS_ENABLED(CONFIG_CC_ASM_FLAG_OUTPUT_BROKEN)) -#define __HAVE_ASM_FLAG_OUTPUTS__ +#define __HAVE_ASM_FLAG_OUTPUTS__ 1 #define CC_IPM(sym) #define CC_OUT(sym, var) "=@cc" (var) From 48b4790f054994d4df6d1025ec9267b19618f0ec Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 13 Jan 2026 20:44:00 +0100 Subject: [PATCH 4/4] s390/preempt: Optimize __preempt_count_dec_and_test() Provide an inline assembly using alternatives to avoid the need of a base register due to relocatable lowcore when adding or subtracting small constants from preempt_count. Main user is preempt_enable(), which subtracts one from preempt_count and tests if the result is zero. With this the generated code changes from 1000b8: a7 19 00 00 lghi %r1,0 1000bc: eb ff 13 a8 00 6e alsi 936(%r1),-1 1000c2: a7 54 00 05 jnhe 1000cc <__rcu_read_unlock+0x14> to something like this: 1000b8: eb ff 03 a8 00 6e alsi 936,-1 1000be: a7 54 00 05 jnhe 1000c8 <__rcu_read_unlock+0x10> Kernel image size is reduced by 45kb (bloat-o-meter -t, defconfig, gcc15). Reviewed-by: Sven Schnelle Signed-off-by: Heiko Carstens --- arch/s390/include/asm/preempt.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h index ef6ea3163c27..6e5821bb047e 100644 --- a/arch/s390/include/asm/preempt.h +++ b/arch/s390/include/asm/preempt.h @@ -113,7 +113,22 @@ static __always_inline void __preempt_count_sub(int val) */ static __always_inline bool __preempt_count_dec_and_test(void) { +#ifdef __HAVE_ASM_FLAG_OUTPUTS__ + unsigned long lc_preempt; + int cc; + + lc_preempt = offsetof(struct lowcore, preempt_count); + asm_inline( + ALTERNATIVE("alsi %[offzero](%%r0),%[val]\n", + "alsi %[offalt](%%r0),%[val]\n", + ALT_FEATURE(MFEATURE_LOWCORE)) + : "=@cc" (cc), "+m" (((struct lowcore *)0)->preempt_count) + : [offzero] "i" (lc_preempt), [val] "i" (-1), + [offalt] "i" (lc_preempt + LOWCORE_ALT_ADDRESS)); + return (cc == 0) || (cc == 2); +#else return __atomic_add_const_and_test(-1, &get_lowcore()->preempt_count); +#endif } /*