KVM: x86: Add CONFIG_KVM_IOAPIC to allow disabling in-kernel I/O APIC

Add a Kconfig to allow building KVM without support for emulating a I/O
APIC, PIC, and PIT, which is desirable for deployments that effectively
don't support a fully in-kernel IRQ chip, i.e. never expect any VMM to
create an in-kernel I/O APIC.  E.g. compiling out support eliminates a few
thousand lines of guest-facing code and gives security folks warm fuzzies.

As a bonus, wrapping relevant paths with CONFIG_KVM_IOAPIC #ifdefs makes
it much easier for readers to understand which bits and pieces exist
specifically for fully in-kernel IRQ chips.

Opportunistically convert all two in-kernel uses of __KVM_HAVE_IOAPIC to
CONFIG_KVM_IOAPIC, e.g. rather than add a second #ifdef to generate a stub
for kvm_arch_post_irq_routing_update().

Acked-by: Kai Huang <kai.huang@intel.com>
Link: https://lore.kernel.org/r/20250611213557.294358-15-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
This commit is contained in:
Sean Christopherson 2025-06-11 14:35:53 -07:00
parent 2c938850d9
commit 628a27731e
12 changed files with 65 additions and 10 deletions

View File

@ -1375,9 +1375,11 @@ struct kvm_arch {
atomic_t noncoherent_dma_count;
#define __KVM_HAVE_ARCH_ASSIGNED_DEVICE
atomic_t assigned_device_count;
#ifdef CONFIG_KVM_IOAPIC
struct kvm_pic *vpic;
struct kvm_ioapic *vioapic;
struct kvm_pit *vpit;
#endif
atomic_t vapics_in_nmi_mode;
struct mutex apic_map_lock;
struct kvm_apic_map __rcu *apic_map;

View File

@ -166,6 +166,16 @@ config KVM_AMD_SEV
Encrypted State (SEV-ES), and Secure Encrypted Virtualization with
Secure Nested Paging (SEV-SNP) technologies on AMD processors.
config KVM_IOAPIC
bool "I/O APIC, PIC, and PIT emulation"
default y
depends on KVM
help
Provides support for KVM to emulate an I/O APIC, PIC, and PIT, i.e.
for full in-kernel APIC emulation.
If unsure, say Y.
config KVM_SMM
bool "System Management Mode emulation"
default y

View File

@ -5,12 +5,13 @@ ccflags-$(CONFIG_KVM_WERROR) += -Werror
include $(srctree)/virt/kvm/Makefile.kvm
kvm-y += x86.o emulate.o i8259.o irq.o lapic.o \
i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
kvm-y += x86.o emulate.o irq.o lapic.o \
irq_comm.o cpuid.o pmu.o mtrr.o \
debugfs.o mmu/mmu.o mmu/page_track.o \
mmu/spte.o
kvm-$(CONFIG_X86_64) += mmu/tdp_iter.o mmu/tdp_mmu.o
kvm-$(CONFIG_KVM_IOAPIC) += i8259.o i8254.o ioapic.o
kvm-$(CONFIG_KVM_HYPERV) += hyperv.o
kvm-$(CONFIG_KVM_XEN) += xen.o
kvm-$(CONFIG_KVM_SMM) += smm.o

View File

@ -8,6 +8,7 @@
#include <uapi/asm/kvm.h>
#ifdef CONFIG_KVM_IOAPIC
struct kvm_kpit_channel_state {
u32 count; /* can be 65536 */
u16 latched_count;
@ -64,5 +65,6 @@ int kvm_vm_ioctl_reinject(struct kvm *kvm, struct kvm_reinject_control *control)
struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags);
void kvm_free_pit(struct kvm *kvm);
#endif /* CONFIG_KVM_IOAPIC */
#endif

View File

@ -76,8 +76,10 @@ int kvm_cpu_has_extint(struct kvm_vcpu *v)
if (!kvm_apic_accept_pic_intr(v))
return 0;
#ifdef CONFIG_KVM_IOAPIC
if (pic_in_kernel(v->kvm))
return v->kvm->arch.vpic->output;
#endif
WARN_ON_ONCE(!irqchip_split(v->kvm));
return pending_userspace_extint(v);
@ -136,8 +138,10 @@ int kvm_cpu_get_extint(struct kvm_vcpu *v)
return v->kvm->arch.xen.upcall_vector;
#endif
#ifdef CONFIG_KVM_IOAPIC
if (pic_in_kernel(v->kvm))
return kvm_pic_read_irq(v->kvm); /* PIC */
#endif
WARN_ON_ONCE(!irqchip_split(v->kvm));
return get_userspace_extint(v);
@ -171,7 +175,9 @@ void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
void __kvm_migrate_timers(struct kvm_vcpu *vcpu)
{
__kvm_migrate_apic_timer(vcpu);
#ifdef CONFIG_KVM_IOAPIC
__kvm_migrate_pit_timer(vcpu);
#endif
kvm_x86_call(migrate_timers)(vcpu);
}
@ -187,6 +193,7 @@ bool kvm_arch_irqchip_in_kernel(struct kvm *kvm)
return irqchip_in_kernel(kvm);
}
#ifdef CONFIG_KVM_IOAPIC
#define IOAPIC_ROUTING_ENTRY(irq) \
{ .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \
.u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } }
@ -273,3 +280,4 @@ int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
kvm_pic_update_irq(pic);
return r;
}
#endif

View File

@ -18,6 +18,8 @@
#include <kvm/iodev.h>
#include "lapic.h"
#ifdef CONFIG_KVM_IOAPIC
#define PIC_NUM_PINS 16
#define SELECT_PIC(irq) \
((irq) < 8 ? KVM_IRQCHIP_PIC_MASTER : KVM_IRQCHIP_PIC_SLAVE)
@ -79,12 +81,19 @@ static inline int irqchip_full(struct kvm *kvm)
smp_rmb();
return mode == KVM_IRQCHIP_KERNEL;
}
#else /* CONFIG_KVM_IOAPIC */
static __always_inline int irqchip_full(struct kvm *kvm)
{
return false;
}
#endif
static inline int pic_in_kernel(struct kvm *kvm)
{
return irqchip_full(kvm);
}
static inline int irqchip_split(struct kvm *kvm)
{
int mode = kvm->arch.irqchip_mode;

View File

@ -208,6 +208,7 @@ int kvm_set_routing_entry(struct kvm *kvm,
* check kvm_arch_can_set_irq_routing() before calling this function.
*/
switch (ue->type) {
#ifdef CONFIG_KVM_IOAPIC
case KVM_IRQ_ROUTING_IRQCHIP:
if (irqchip_split(kvm))
return -EINVAL;
@ -231,6 +232,7 @@ int kvm_set_routing_entry(struct kvm *kvm,
}
e->irqchip.irqchip = ue->u.irqchip.irqchip;
break;
#endif
case KVM_IRQ_ROUTING_MSI:
e->set = kvm_set_msi;
e->msi.address_lo = ue->u.msi.address_lo;

View File

@ -1455,7 +1455,7 @@ static bool kvm_ioapic_handles_vector(struct kvm_lapic *apic, int vector)
static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
{
int trigger_mode;
int __maybe_unused trigger_mode;
/* Eoi the ioapic only if the ioapic doesn't own the vector. */
if (!kvm_ioapic_handles_vector(apic, vector))
@ -1476,12 +1476,14 @@ static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
return;
}
#ifdef CONFIG_KVM_IOAPIC
if (apic_test_vector(vector, apic->regs + APIC_TMR))
trigger_mode = IOAPIC_LEVEL_TRIG;
else
trigger_mode = IOAPIC_EDGE_TRIG;
kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode);
#endif
}
static int apic_set_eoi(struct kvm_lapic *apic)
@ -3146,8 +3148,11 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
kvm_x86_call(hwapic_isr_update)(vcpu, apic_find_highest_isr(apic));
}
kvm_make_request(KVM_REQ_EVENT, vcpu);
#ifdef CONFIG_KVM_IOAPIC
if (ioapic_in_kernel(vcpu->kvm))
kvm_rtc_eoi_tracking_restore_one(vcpu);
#endif
vcpu->arch.apic_arb_prio = 0;

View File

@ -270,6 +270,7 @@ TRACE_EVENT(kvm_cpuid,
{0x6, "SIPI"}, \
{0x7, "ExtINT"}
#ifdef CONFIG_KVM_IOAPIC
TRACE_EVENT(kvm_ioapic_set_irq,
TP_PROTO(__u64 e, int pin, bool coalesced),
TP_ARGS(e, pin, coalesced),
@ -314,6 +315,7 @@ TRACE_EVENT(kvm_ioapic_delayed_eoi_inj,
(__entry->e & (1<<15)) ? "level" : "edge",
(__entry->e & (1<<16)) ? "|masked" : "")
);
#endif
TRACE_EVENT(kvm_msi_set_irq,
TP_PROTO(__u64 address, __u64 data),

View File

@ -4632,17 +4632,20 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_EXT_CPUID:
case KVM_CAP_EXT_EMUL_CPUID:
case KVM_CAP_CLOCKSOURCE:
#ifdef CONFIG_KVM_IOAPIC
case KVM_CAP_PIT:
case KVM_CAP_PIT2:
case KVM_CAP_PIT_STATE2:
case KVM_CAP_REINJECT_CONTROL:
#endif
case KVM_CAP_NOP_IO_DELAY:
case KVM_CAP_MP_STATE:
case KVM_CAP_SYNC_MMU:
case KVM_CAP_USER_NMI:
case KVM_CAP_REINJECT_CONTROL:
case KVM_CAP_IRQ_INJECT_STATUS:
case KVM_CAP_IOEVENTFD:
case KVM_CAP_IOEVENTFD_NO_LENGTH:
case KVM_CAP_PIT2:
case KVM_CAP_PIT_STATE2:
case KVM_CAP_SET_IDENTITY_MAP_ADDR:
case KVM_CAP_VCPU_EVENTS:
#ifdef CONFIG_KVM_HYPERV
@ -6937,9 +6940,11 @@ int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
struct kvm *kvm = filp->private_data;
void __user *argp = (void __user *)arg;
int r = -ENOTTY;
#ifdef CONFIG_KVM_IOAPIC
/*
* This union makes it completely explicit to gcc-3.x
* that these two variables' stack usage should be
* that these three variables' stack usage should be
* combined, not added together.
*/
union {
@ -6947,6 +6952,7 @@ int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
struct kvm_pit_state2 ps2;
struct kvm_pit_config pit_config;
} u;
#endif
switch (ioctl) {
case KVM_SET_TSS_ADDR:
@ -6970,6 +6976,7 @@ int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
case KVM_SET_NR_MMU_PAGES:
r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
break;
#ifdef CONFIG_KVM_IOAPIC
case KVM_CREATE_IRQCHIP: {
mutex_lock(&kvm->lock);
@ -7136,6 +7143,7 @@ int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
r = kvm_vm_ioctl_reinject(kvm, &control);
break;
}
#endif
case KVM_SET_BOOT_CPU_ID:
r = 0;
mutex_lock(&kvm->lock);
@ -10595,8 +10603,10 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
if (irqchip_split(vcpu->kvm))
kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors);
#ifdef CONFIG_KVM_IOAPIC
else if (ioapic_in_kernel(vcpu->kvm))
kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
#endif
if (is_guest_mode(vcpu))
vcpu->arch.load_eoi_exitmap_pending = true;
@ -12799,7 +12809,9 @@ void kvm_arch_pre_destroy_vm(struct kvm *kvm)
cancel_delayed_work_sync(&kvm->arch.kvmclock_sync_work);
cancel_delayed_work_sync(&kvm->arch.kvmclock_update_work);
#ifdef CONFIG_KVM_IOAPIC
kvm_free_pit(kvm);
#endif
kvm_mmu_pre_destroy_vm(kvm);
static_call_cond(kvm_x86_vm_pre_destroy)(kvm);
@ -12823,8 +12835,10 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
}
kvm_destroy_vcpus(kvm);
kvm_free_msr_filter(srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1));
#ifdef CONFIG_KVM_IOAPIC
kvm_pic_destroy(kvm);
kvm_ioapic_destroy(kvm);
#endif
kvfree(rcu_dereference_check(kvm->arch.apic_map, 1));
kfree(srcu_dereference_check(kvm->arch.pmu_event_filter, &kvm->srcu, 1));
kvm_mmu_uninit_vm(kvm);

View File

@ -1023,7 +1023,7 @@ void kvm_unlock_all_vcpus(struct kvm *kvm);
void vcpu_load(struct kvm_vcpu *vcpu);
void vcpu_put(struct kvm_vcpu *vcpu);
#ifdef __KVM_HAVE_IOAPIC
#ifdef CONFIG_KVM_IOAPIC
void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm);
#else
static inline void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm)

View File

@ -84,14 +84,14 @@ TRACE_EVENT(kvm_set_irq,
);
#endif /* defined(CONFIG_HAVE_KVM_IRQCHIP) */
#if defined(__KVM_HAVE_IOAPIC)
#ifdef CONFIG_KVM_IOAPIC
#define kvm_irqchips \
{KVM_IRQCHIP_PIC_MASTER, "PIC master"}, \
{KVM_IRQCHIP_PIC_SLAVE, "PIC slave"}, \
{KVM_IRQCHIP_IOAPIC, "IOAPIC"}
#endif /* defined(__KVM_HAVE_IOAPIC) */
#endif /* CONFIG_KVM_IOAPIC */
#if defined(CONFIG_HAVE_KVM_IRQCHIP)