mirror of
https://github.com/torvalds/linux.git
synced 2026-05-26 16:12:59 +02:00
x86/syscall/64: Move 64-bit syscall dispatch code
Move the 64-bit syscall dispatch code to syscall_64.c. No functional changes. Signed-off-by: Brian Gerst <brgerst@gmail.com> Signed-off-by: Ingo Molnar <mingo@kernel.org> Reviewed-by: Sohil Mehta <sohil.mehta@intel.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Link: https://lore.kernel.org/r/20250314151220.862768-4-brgerst@gmail.com
This commit is contained in:
parent
b634b02e2b
commit
01dfb48054
|
|
@ -9,9 +9,11 @@ KCOV_INSTRUMENT := n
|
|||
|
||||
CFLAGS_REMOVE_common.o = $(CC_FLAGS_FTRACE)
|
||||
CFLAGS_REMOVE_syscall_32.o = $(CC_FLAGS_FTRACE)
|
||||
CFLAGS_REMOVE_syscall_64.o = $(CC_FLAGS_FTRACE)
|
||||
|
||||
CFLAGS_common.o += -fno-stack-protector
|
||||
CFLAGS_syscall_32.o += -fno-stack-protector
|
||||
CFLAGS_syscall_64.o += -fno-stack-protector
|
||||
|
||||
obj-y := entry.o entry_$(BITS).o syscall_$(BITS).o
|
||||
obj-y += common.o
|
||||
|
|
|
|||
|
|
@ -32,99 +32,6 @@
|
|||
#include <asm/syscall.h>
|
||||
#include <asm/irq_stack.h>
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
||||
static __always_inline bool do_syscall_x64(struct pt_regs *regs, int nr)
|
||||
{
|
||||
/*
|
||||
* Convert negative numbers to very high and thus out of range
|
||||
* numbers for comparisons.
|
||||
*/
|
||||
unsigned int unr = nr;
|
||||
|
||||
if (likely(unr < NR_syscalls)) {
|
||||
unr = array_index_nospec(unr, NR_syscalls);
|
||||
regs->ax = x64_sys_call(regs, unr);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
|
||||
{
|
||||
/*
|
||||
* Adjust the starting offset of the table, and convert numbers
|
||||
* < __X32_SYSCALL_BIT to very high and thus out of range
|
||||
* numbers for comparisons.
|
||||
*/
|
||||
unsigned int xnr = nr - __X32_SYSCALL_BIT;
|
||||
|
||||
if (IS_ENABLED(CONFIG_X86_X32_ABI) && likely(xnr < X32_NR_syscalls)) {
|
||||
xnr = array_index_nospec(xnr, X32_NR_syscalls);
|
||||
regs->ax = x32_sys_call(regs, xnr);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Returns true to return using SYSRET, or false to use IRET */
|
||||
__visible noinstr bool do_syscall_64(struct pt_regs *regs, int nr)
|
||||
{
|
||||
add_random_kstack_offset();
|
||||
nr = syscall_enter_from_user_mode(regs, nr);
|
||||
|
||||
instrumentation_begin();
|
||||
|
||||
if (!do_syscall_x64(regs, nr) && !do_syscall_x32(regs, nr) && nr != -1) {
|
||||
/* Invalid system call, but still a system call. */
|
||||
regs->ax = __x64_sys_ni_syscall(regs);
|
||||
}
|
||||
|
||||
instrumentation_end();
|
||||
syscall_exit_to_user_mode(regs);
|
||||
|
||||
/*
|
||||
* Check that the register state is valid for using SYSRET to exit
|
||||
* to userspace. Otherwise use the slower but fully capable IRET
|
||||
* exit path.
|
||||
*/
|
||||
|
||||
/* XEN PV guests always use the IRET path */
|
||||
if (cpu_feature_enabled(X86_FEATURE_XENPV))
|
||||
return false;
|
||||
|
||||
/* SYSRET requires RCX == RIP and R11 == EFLAGS */
|
||||
if (unlikely(regs->cx != regs->ip || regs->r11 != regs->flags))
|
||||
return false;
|
||||
|
||||
/* CS and SS must match the values set in MSR_STAR */
|
||||
if (unlikely(regs->cs != __USER_CS || regs->ss != __USER_DS))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
|
||||
* in kernel space. This essentially lets the user take over
|
||||
* the kernel, since userspace controls RSP.
|
||||
*
|
||||
* TASK_SIZE_MAX covers all user-accessible addresses other than
|
||||
* the deprecated vsyscall page.
|
||||
*/
|
||||
if (unlikely(regs->ip >= TASK_SIZE_MAX))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* SYSRET cannot restore RF. It can restore TF, but unlike IRET,
|
||||
* restoring TF results in a trap from userspace immediately after
|
||||
* SYSRET.
|
||||
*/
|
||||
if (unlikely(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF)))
|
||||
return false;
|
||||
|
||||
/* Use SYSRET to exit to userspace */
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
SYSCALL_DEFINE0(ni_syscall)
|
||||
{
|
||||
return -ENOSYS;
|
||||
|
|
|
|||
|
|
@ -1,10 +1,12 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* System call table for x86-64. */
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/* 64-bit system call dispatch */
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <linux/sys.h>
|
||||
#include <linux/cache.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/entry-common.h>
|
||||
#include <linux/nospec.h>
|
||||
#include <asm/syscall.h>
|
||||
|
||||
#define __SYSCALL(nr, sym) extern long __x64_##sym(const struct pt_regs *);
|
||||
|
|
@ -34,3 +36,93 @@ long x64_sys_call(const struct pt_regs *regs, unsigned int nr)
|
|||
default: return __x64_sys_ni_syscall(regs);
|
||||
}
|
||||
};
|
||||
|
||||
static __always_inline bool do_syscall_x64(struct pt_regs *regs, int nr)
|
||||
{
|
||||
/*
|
||||
* Convert negative numbers to very high and thus out of range
|
||||
* numbers for comparisons.
|
||||
*/
|
||||
unsigned int unr = nr;
|
||||
|
||||
if (likely(unr < NR_syscalls)) {
|
||||
unr = array_index_nospec(unr, NR_syscalls);
|
||||
regs->ax = x64_sys_call(regs, unr);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
|
||||
{
|
||||
/*
|
||||
* Adjust the starting offset of the table, and convert numbers
|
||||
* < __X32_SYSCALL_BIT to very high and thus out of range
|
||||
* numbers for comparisons.
|
||||
*/
|
||||
unsigned int xnr = nr - __X32_SYSCALL_BIT;
|
||||
|
||||
if (IS_ENABLED(CONFIG_X86_X32_ABI) && likely(xnr < X32_NR_syscalls)) {
|
||||
xnr = array_index_nospec(xnr, X32_NR_syscalls);
|
||||
regs->ax = x32_sys_call(regs, xnr);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Returns true to return using SYSRET, or false to use IRET */
|
||||
__visible noinstr bool do_syscall_64(struct pt_regs *regs, int nr)
|
||||
{
|
||||
add_random_kstack_offset();
|
||||
nr = syscall_enter_from_user_mode(regs, nr);
|
||||
|
||||
instrumentation_begin();
|
||||
|
||||
if (!do_syscall_x64(regs, nr) && !do_syscall_x32(regs, nr) && nr != -1) {
|
||||
/* Invalid system call, but still a system call. */
|
||||
regs->ax = __x64_sys_ni_syscall(regs);
|
||||
}
|
||||
|
||||
instrumentation_end();
|
||||
syscall_exit_to_user_mode(regs);
|
||||
|
||||
/*
|
||||
* Check that the register state is valid for using SYSRET to exit
|
||||
* to userspace. Otherwise use the slower but fully capable IRET
|
||||
* exit path.
|
||||
*/
|
||||
|
||||
/* XEN PV guests always use the IRET path */
|
||||
if (cpu_feature_enabled(X86_FEATURE_XENPV))
|
||||
return false;
|
||||
|
||||
/* SYSRET requires RCX == RIP and R11 == EFLAGS */
|
||||
if (unlikely(regs->cx != regs->ip || regs->r11 != regs->flags))
|
||||
return false;
|
||||
|
||||
/* CS and SS must match the values set in MSR_STAR */
|
||||
if (unlikely(regs->cs != __USER_CS || regs->ss != __USER_DS))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
|
||||
* in kernel space. This essentially lets the user take over
|
||||
* the kernel, since userspace controls RSP.
|
||||
*
|
||||
* TASK_SIZE_MAX covers all user-accessible addresses other than
|
||||
* the deprecated vsyscall page.
|
||||
*/
|
||||
if (unlikely(regs->ip >= TASK_SIZE_MAX))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* SYSRET cannot restore RF. It can restore TF, but unlike IRET,
|
||||
* restoring TF results in a trap from userspace immediately after
|
||||
* SYSRET.
|
||||
*/
|
||||
if (unlikely(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF)))
|
||||
return false;
|
||||
|
||||
/* Use SYSRET to exit to userspace */
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user