mirror of
https://github.com/torvalds/linux.git
synced 2026-05-28 09:04:39 +02:00
LoongArch: Align FPU register state to 32 bytes
Move fpr to the beginning of struct loongarch_fpu so it is naturally aligned to FPU_ALIGN (32 bytes), improving 256-bit SIMD (LASX) context switch performance. Also adjust process.c and fpu.S to work well with the new loongarch_fpu layout. Signed-off-by: Lisa Robinson <lisa@bytefly.space> Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
This commit is contained in:
parent
1829419bc3
commit
e3f4591f79
|
|
@ -80,10 +80,10 @@ BUILD_FPR_ACCESS(32)
|
|||
BUILD_FPR_ACCESS(64)
|
||||
|
||||
struct loongarch_fpu {
|
||||
union fpureg fpr[NUM_FPU_REGS];
|
||||
uint64_t fcc; /* 8x8 */
|
||||
uint32_t fcsr;
|
||||
uint32_t ftop;
|
||||
union fpureg fpr[NUM_FPU_REGS];
|
||||
};
|
||||
|
||||
struct loongarch_lbt {
|
||||
|
|
|
|||
|
|
@ -97,7 +97,7 @@
|
|||
.endm
|
||||
|
||||
#ifdef CONFIG_32BIT
|
||||
.macro sc_save_fcc thread tmp0 tmp1
|
||||
.macro sc_save_fcc base tmp0 tmp1
|
||||
movcf2gr \tmp0, $fcc0
|
||||
move \tmp1, \tmp0
|
||||
movcf2gr \tmp0, $fcc1
|
||||
|
|
@ -106,7 +106,7 @@
|
|||
bstrins.w \tmp1, \tmp0, 23, 16
|
||||
movcf2gr \tmp0, $fcc3
|
||||
bstrins.w \tmp1, \tmp0, 31, 24
|
||||
EX st.w \tmp1, \thread, THREAD_FCC
|
||||
EX st.w \tmp1, \base, 0
|
||||
movcf2gr \tmp0, $fcc4
|
||||
move \tmp1, \tmp0
|
||||
movcf2gr \tmp0, $fcc5
|
||||
|
|
@ -115,11 +115,11 @@
|
|||
bstrins.w \tmp1, \tmp0, 23, 16
|
||||
movcf2gr \tmp0, $fcc7
|
||||
bstrins.w \tmp1, \tmp0, 31, 24
|
||||
EX st.w \tmp1, \thread, (THREAD_FCC + 4)
|
||||
EX st.w \tmp1, \base, 4
|
||||
.endm
|
||||
|
||||
.macro sc_restore_fcc thread tmp0 tmp1
|
||||
EX ld.w \tmp0, \thread, THREAD_FCC
|
||||
.macro sc_restore_fcc base tmp0 tmp1
|
||||
EX ld.w \tmp0, \base, 0
|
||||
bstrpick.w \tmp1, \tmp0, 7, 0
|
||||
movgr2cf $fcc0, \tmp1
|
||||
bstrpick.w \tmp1, \tmp0, 15, 8
|
||||
|
|
@ -128,7 +128,7 @@
|
|||
movgr2cf $fcc2, \tmp1
|
||||
bstrpick.w \tmp1, \tmp0, 31, 24
|
||||
movgr2cf $fcc3, \tmp1
|
||||
EX ld.w \tmp0, \thread, (THREAD_FCC + 4)
|
||||
EX ld.w \tmp0, \base, 4
|
||||
bstrpick.w \tmp1, \tmp0, 7, 0
|
||||
movgr2cf $fcc4, \tmp1
|
||||
bstrpick.w \tmp1, \tmp0, 15, 8
|
||||
|
|
|
|||
|
|
@ -135,6 +135,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
|
|||
return 0;
|
||||
}
|
||||
|
||||
dst->thread.fpu.fcsr = src->thread.fpu.fcsr;
|
||||
|
||||
if (!used_math())
|
||||
memcpy(dst, src, offsetof(struct task_struct, thread.fpu.fpr));
|
||||
else
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user