x86/kvm/vmx: Move IRQ/NMI dispatch from KVM into x86 core

Move the VMX interrupt dispatch magic into the x86 core code. This
isolates KVM from the FRED/IDT decisions and reduces the amount of
EXPORT_SYMBOL_FOR_KVM().

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Tested-by: "Verma, Vishal L" <vishal.l.verma@intel.com>
Tested-by: Zhao Liu <zhao1.liu@intel.com>
Tested-by: Zhao Liu <zhao1.liu@intel.com>
Tested-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: Binbin Wu <binbin.wu@linxu.intel.com>
Acked-by: Sean Christopherson <seanjc@google.com>
Link: https://patch.msgid.link/20260508091829.GO3126523@noisy.programming.kicks-ass.net
This commit is contained in:
Peter Zijlstra 2026-05-08 11:18:29 +02:00 committed by Thomas Gleixner
parent b088fe3501
commit 0701c9e17b
12 changed files with 119 additions and 68 deletions

View File

@ -13,7 +13,7 @@ CFLAGS_REMOVE_syscall_64.o = $(CC_FLAGS_FTRACE)
CFLAGS_syscall_32.o += -fno-stack-protector
CFLAGS_syscall_64.o += -fno-stack-protector
obj-y := entry.o entry_$(BITS).o syscall_$(BITS).o
obj-y := entry.o entry_$(BITS).o syscall_$(BITS).o common.o
obj-y += vdso/
obj-y += vsyscall/

48
arch/x86/entry/common.c Normal file
View File

@ -0,0 +1,48 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/entry-common.h>
#include <linux/kvm_types.h>
#include <asm/fred.h>
#include <asm/desc.h>
#if IS_ENABLED(CONFIG_KVM_INTEL)
/*
* On VMX, NMIs and IRQs (as configured by KVM) are acknowledged by hardware as
* part of the VM-Exit, i.e. the event itself is consumed as part the VM-Exit.
* x86_entry_from_kvm() is invoked by KVM to effectively forward NMIs and IRQs
* to the kernel for servicing. On SVM, a.k.a. AMD, the NMI/IRQ VM-Exit is
* purely a signal that an NMI/IRQ is pending, i.e. the event that triggered
* the VM-Exit is held pending until it's unblocked in the host.
*/
noinstr void x86_entry_from_kvm(unsigned int event_type, unsigned int vector)
{
if (event_type == EVENT_TYPE_EXTINT) {
#ifdef CONFIG_X86_64
/*
* Use FRED dispatch, even when running IDT. The dispatch
* tables are kept in sync between FRED and IDT, and the FRED
* dispatch works well with CFI.
*/
fred_entry_from_kvm(event_type, vector);
#else
idt_entry_from_kvm(vector);
#endif
return;
}
WARN_ON_ONCE(event_type != EVENT_TYPE_NMI);
#ifdef CONFIG_X86_64
if (cpu_feature_enabled(X86_FEATURE_FRED))
return fred_entry_from_kvm(event_type, vector);
#endif
/*
* Notably, we must use IDT dispatch for NMI when running in IDT mode.
* The FRED NMI context is significantly different and will not work
* right (specifically FRED fixed the NMI recursion issue).
*/
idt_entry_from_kvm(vector);
}
EXPORT_SYMBOL_FOR_KVM(x86_entry_from_kvm);
#endif

View File

@ -75,3 +75,49 @@ THUNK warn_thunk_thunk, __warn_thunk
#if defined(CONFIG_STACKPROTECTOR) && defined(CONFIG_SMP)
EXPORT_SYMBOL(__ref_stack_chk_guard);
#endif
#if IS_ENABLED(CONFIG_KVM_INTEL)
.macro IDT_DO_EVENT_IRQOFF call_insn call_target
/*
* Unconditionally create a stack frame, getting the correct RSP on the
* stack (for x86-64) would take two instructions anyways, and RBP can
* be used to restore RSP to make objtool happy (see below).
*/
push %_ASM_BP
mov %_ASM_SP, %_ASM_BP
#ifdef CONFIG_X86_64
/*
* Align RSP to a 16-byte boundary (to emulate CPU behavior) before
* creating the synthetic interrupt stack frame for the IRQ/NMI.
*/
and $-16, %rsp
push $__KERNEL_DS
push %rbp
#endif
pushf
push $__KERNEL_CS
\call_insn \call_target
/*
* "Restore" RSP from RBP, even though IRET has already unwound RSP to
* the correct value. objtool doesn't know the callee will IRET and,
* without the explicit restore, thinks the stack is getting walloped.
* Using an unwind hint is problematic due to x86-64's dynamic alignment.
*/
leave
RET
.endm
.pushsection .text, "ax"
SYM_FUNC_START(idt_do_interrupt_irqoff)
IDT_DO_EVENT_IRQOFF CALL_NOSPEC _ASM_ARG1
SYM_FUNC_END(idt_do_interrupt_irqoff)
.popsection
.pushsection .noinstr.text, "ax"
SYM_FUNC_START(idt_do_nmi_irqoff)
IDT_DO_EVENT_IRQOFF call asm_exc_nmi_kvm_vmx
SYM_FUNC_END(idt_do_nmi_irqoff)
.popsection
#endif

View File

@ -147,5 +147,4 @@ SYM_FUNC_START(asm_fred_entry_from_kvm)
RET
SYM_FUNC_END(asm_fred_entry_from_kvm)
EXPORT_SYMBOL_FOR_KVM(asm_fred_entry_from_kvm);
#endif

View File

@ -438,6 +438,10 @@ extern void idt_setup_traps(void);
extern void idt_setup_apic_and_irq_gates(void);
extern bool idt_is_f00f_address(unsigned long address);
extern void idt_do_interrupt_irqoff(unsigned long address);
extern void idt_do_nmi_irqoff(void);
extern void idt_entry_from_kvm(unsigned int vector);
#ifdef CONFIG_X86_64
extern void idt_setup_early_pf(void);
#else

View File

@ -145,7 +145,7 @@ struct gate_struct {
typedef struct gate_struct gate_desc;
#ifndef _SETUP
static inline unsigned long gate_offset(const gate_desc *g)
static __always_inline unsigned long gate_offset(const gate_desc *g)
{
#ifdef CONFIG_X86_64
return g->offset_low | ((unsigned long)g->offset_middle << 16) |

View File

@ -97,4 +97,6 @@ static __always_inline void arch_exit_to_user_mode(void)
}
#define arch_exit_to_user_mode arch_exit_to_user_mode
extern void x86_entry_from_kvm(unsigned int entry_type, unsigned int vector);
#endif

View File

@ -110,7 +110,6 @@ static __always_inline unsigned long fred_event_data(struct pt_regs *regs) { ret
static inline void cpu_init_fred_exceptions(void) { }
static inline void cpu_init_fred_rsps(void) { }
static inline void fred_complete_exception_setup(void) { }
static inline void fred_entry_from_kvm(unsigned int type, unsigned int vector) { }
static inline void fred_sync_rsp0(unsigned long rsp0) { }
static inline void fred_update_rsp0(void) { }
#endif /* CONFIG_X86_FRED */

View File

@ -268,6 +268,21 @@ void __init idt_setup_early_pf(void)
}
#endif
#if IS_ENABLED(CONFIG_KVM_INTEL)
noinstr void idt_entry_from_kvm(unsigned int vector)
{
if (vector == NMI_VECTOR)
return idt_do_nmi_irqoff();
/*
* Only the NMI path requires noinstr.
*/
instrumentation_begin();
idt_do_interrupt_irqoff(gate_offset(idt_table + vector));
instrumentation_end();
}
#endif
static void __init idt_map_in_cea(void)
{
/*

View File

@ -614,7 +614,6 @@ DEFINE_IDTENTRY_RAW(exc_nmi_kvm_vmx)
{
exc_nmi(regs);
}
EXPORT_SYMBOL_FOR_KVM(asm_exc_nmi_kvm_vmx);
#endif
#ifdef CONFIG_NMI_CHECK_CPU

View File

@ -31,38 +31,6 @@
#define VCPU_R15 __VCPU_REGS_R15 * WORD_SIZE
#endif
.macro VMX_DO_EVENT_IRQOFF call_insn call_target
/*
* Unconditionally create a stack frame, getting the correct RSP on the
* stack (for x86-64) would take two instructions anyways, and RBP can
* be used to restore RSP to make objtool happy (see below).
*/
push %_ASM_BP
mov %_ASM_SP, %_ASM_BP
#ifdef CONFIG_X86_64
/*
* Align RSP to a 16-byte boundary (to emulate CPU behavior) before
* creating the synthetic interrupt stack frame for the IRQ/NMI.
*/
and $-16, %rsp
push $__KERNEL_DS
push %rbp
#endif
pushf
push $__KERNEL_CS
\call_insn \call_target
/*
* "Restore" RSP from RBP, even though IRET has already unwound RSP to
* the correct value. objtool doesn't know the callee will IRET and,
* without the explicit restore, thinks the stack is getting walloped.
* Using an unwind hint is problematic due to x86-64's dynamic alignment.
*/
leave
RET
.endm
.section .noinstr.text, "ax"
/**
@ -320,10 +288,6 @@ SYM_INNER_LABEL_ALIGN(vmx_vmexit, SYM_L_GLOBAL)
SYM_FUNC_END(__vmx_vcpu_run)
SYM_FUNC_START(vmx_do_nmi_irqoff)
VMX_DO_EVENT_IRQOFF call asm_exc_nmi_kvm_vmx
SYM_FUNC_END(vmx_do_nmi_irqoff)
#ifndef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
/**
@ -375,13 +339,3 @@ SYM_FUNC_START(vmread_error_trampoline)
RET
SYM_FUNC_END(vmread_error_trampoline)
#endif
.section .text, "ax"
#ifndef CONFIG_X86_FRED
SYM_FUNC_START(vmx_do_interrupt_irqoff)
VMX_DO_EVENT_IRQOFF CALL_NOSPEC _ASM_ARG1
SYM_FUNC_END(vmx_do_interrupt_irqoff)
#endif

View File

@ -7117,9 +7117,6 @@ void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
vmcs_write64(EOI_EXIT_BITMAP3, eoi_exit_bitmap[3]);
}
void vmx_do_interrupt_irqoff(unsigned long entry);
void vmx_do_nmi_irqoff(void);
static void handle_nm_fault_irqoff(struct kvm_vcpu *vcpu)
{
/*
@ -7161,17 +7158,8 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu,
"unexpected VM-Exit interrupt info: 0x%x", intr_info))
return;
/*
* Invoke the kernel's IRQ handler for the vector. Use the FRED path
* when it's available even if FRED isn't fully enabled, e.g. even if
* FRED isn't supported in hardware, in order to avoid the indirect
* CALL in the non-FRED path.
*/
kvm_before_interrupt(vcpu, KVM_HANDLING_IRQ);
if (IS_ENABLED(CONFIG_X86_FRED))
fred_entry_from_kvm(EVENT_TYPE_EXTINT, vector);
else
vmx_do_interrupt_irqoff(gate_offset((gate_desc *)host_idt_base + vector));
x86_entry_from_kvm(EVENT_TYPE_EXTINT, vector);
kvm_after_interrupt(vcpu);
vcpu->arch.at_instruction_boundary = true;
@ -7481,10 +7469,7 @@ noinstr void vmx_handle_nmi(struct kvm_vcpu *vcpu)
return;
kvm_before_interrupt(vcpu, KVM_HANDLING_NMI);
if (cpu_feature_enabled(X86_FEATURE_FRED))
fred_entry_from_kvm(EVENT_TYPE_NMI, NMI_VECTOR);
else
vmx_do_nmi_irqoff();
x86_entry_from_kvm(EVENT_TYPE_NMI, NMI_VECTOR);
kvm_after_interrupt(vcpu);
}