mirror of
https://github.com/torvalds/linux.git
synced 2026-05-23 22:52:19 +02:00
x86/fpu: Simplify the switch_fpu_prepare() + switch_fpu_finish() logic
Now that switch_fpu_finish() doesn't load the FPU state, it makes more sense to fold it into switch_fpu_prepare() renamed to switch_fpu(), and more importantly, use the "prev_p" task as a target for TIF_NEED_FPU_LOAD. It doesn't make any sense to delay set_tsk_thread_flag(TIF_NEED_FPU_LOAD) until "prev_p" is scheduled again. There is no worry about the very first context switch, fpu_clone() must always set TIF_NEED_FPU_LOAD. Also, shift the test_tsk_thread_flag(TIF_NEED_FPU_LOAD) from the callers to switch_fpu(). Note that the "PF_KTHREAD | PF_USER_WORKER" check can be removed but this deserves a separate patch which can change more functions, say, kernel_fpu_begin_mask(). Signed-off-by: Oleg Nesterov <oleg@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org> Cc: Chang S . Bae <chang.seok.bae@intel.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Brian Gerst <brgerst@gmail.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Link: https://lore.kernel.org/r/20250503143830.GA8982@redhat.com
This commit is contained in:
parent
a78701fe4b
commit
730faa15a0
|
|
@ -18,31 +18,25 @@ extern void fpu_flush_thread(void);
|
|||
/*
|
||||
* FPU state switching for scheduling.
|
||||
*
|
||||
* This is a two-stage process:
|
||||
* switch_fpu() saves the old state and sets TIF_NEED_FPU_LOAD if
|
||||
* TIF_NEED_FPU_LOAD is not set. This is done within the context
|
||||
* of the old process.
|
||||
*
|
||||
* - switch_fpu_prepare() saves the old state.
|
||||
* This is done within the context of the old process.
|
||||
*
|
||||
* - switch_fpu_finish() sets TIF_NEED_FPU_LOAD; the floating point state
|
||||
* will get loaded on return to userspace, or when the kernel needs it.
|
||||
*
|
||||
* If TIF_NEED_FPU_LOAD is cleared then the CPU's FPU registers
|
||||
* are saved in the current thread's FPU register state.
|
||||
*
|
||||
* If TIF_NEED_FPU_LOAD is set then CPU's FPU registers may not
|
||||
* hold current()'s FPU registers. It is required to load the
|
||||
* Once TIF_NEED_FPU_LOAD is set, it is required to load the
|
||||
* registers before returning to userland or using the content
|
||||
* otherwise.
|
||||
*
|
||||
* The FPU context is only stored/restored for a user task and
|
||||
* PF_KTHREAD is used to distinguish between kernel and user threads.
|
||||
*/
|
||||
static inline void switch_fpu_prepare(struct task_struct *old, int cpu)
|
||||
static inline void switch_fpu(struct task_struct *old, int cpu)
|
||||
{
|
||||
if (cpu_feature_enabled(X86_FEATURE_FPU) &&
|
||||
if (!test_tsk_thread_flag(old, TIF_NEED_FPU_LOAD) &&
|
||||
cpu_feature_enabled(X86_FEATURE_FPU) &&
|
||||
!(old->flags & (PF_KTHREAD | PF_USER_WORKER))) {
|
||||
struct fpu *old_fpu = x86_task_fpu(old);
|
||||
|
||||
set_tsk_thread_flag(old, TIF_NEED_FPU_LOAD);
|
||||
save_fpregs_to_fpstate(old_fpu);
|
||||
/*
|
||||
* The save operation preserved register state, so the
|
||||
|
|
@ -50,7 +44,7 @@ static inline void switch_fpu_prepare(struct task_struct *old, int cpu)
|
|||
* current CPU number in @old_fpu, so the next return
|
||||
* to user space can avoid the FPU register restore
|
||||
* when is returns on the same CPU and still owns the
|
||||
* context.
|
||||
* context. See fpregs_restore_userregs().
|
||||
*/
|
||||
old_fpu->last_cpu = cpu;
|
||||
|
||||
|
|
@ -58,14 +52,4 @@ static inline void switch_fpu_prepare(struct task_struct *old, int cpu)
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Delay loading of the complete FPU state until the return to userland.
|
||||
* PKRU is handled separately.
|
||||
*/
|
||||
static inline void switch_fpu_finish(struct task_struct *new)
|
||||
{
|
||||
if (cpu_feature_enabled(X86_FEATURE_FPU))
|
||||
set_tsk_thread_flag(new, TIF_NEED_FPU_LOAD);
|
||||
}
|
||||
|
||||
#endif /* _ASM_X86_FPU_SCHED_H */
|
||||
|
|
|
|||
|
|
@ -160,8 +160,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
|||
|
||||
/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
|
||||
|
||||
if (!test_tsk_thread_flag(prev_p, TIF_NEED_FPU_LOAD))
|
||||
switch_fpu_prepare(prev_p, cpu);
|
||||
switch_fpu(prev_p, cpu);
|
||||
|
||||
/*
|
||||
* Save away %gs. No need to save %fs, as it was saved on the
|
||||
|
|
@ -208,8 +207,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
|||
|
||||
raw_cpu_write(current_task, next_p);
|
||||
|
||||
switch_fpu_finish(next_p);
|
||||
|
||||
/* Load the Intel cache allocation PQR MSR. */
|
||||
resctrl_sched_in(next_p);
|
||||
|
||||
|
|
|
|||
|
|
@ -616,8 +616,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
|||
WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
|
||||
this_cpu_read(hardirq_stack_inuse));
|
||||
|
||||
if (!test_tsk_thread_flag(prev_p, TIF_NEED_FPU_LOAD))
|
||||
switch_fpu_prepare(prev_p, cpu);
|
||||
switch_fpu(prev_p, cpu);
|
||||
|
||||
/* We must save %fs and %gs before load_TLS() because
|
||||
* %fs and %gs may be cleared by load_TLS().
|
||||
|
|
@ -671,8 +670,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
|||
raw_cpu_write(current_task, next_p);
|
||||
raw_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p));
|
||||
|
||||
switch_fpu_finish(next_p);
|
||||
|
||||
/* Reload sp0. */
|
||||
update_task_stack(next_p);
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user