From e1df128dc00beaa53b0be4e751b7f2f0192dc146 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Thu, 12 Feb 2026 22:24:04 +0100 Subject: [PATCH 01/13] KVM: x86: Zero-initialize temporary fxregs_state buffers in FXSAVE emulation Explicitly zero-initialize stack-allocated struct fxregs_state variables in em_fxsave() and fxregs_fixup() to ensure all padding and unused fields are cleared before use. Both functions declare temporary fxregs_state buffers that may be partially written by fxsave. Although the emulator copies only the architecturally defined portion of the state to userspace, any padding or otherwise untouched bytes in the structure can remain uninitialized. This can lead to the use of uninitialized stack data and may trigger KMSAN reports. In the worst case, it could result in leaking stack contents if such bytes are ever exposed. No functional change intended. Suggested-by: Sean Christopherson Signed-off-by: Uros Bizjak Cc: Sean Christopherson Cc: Paolo Bonzini Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: "H. Peter Anvin" Link: https://patch.msgid.link/20260212212457.24483-1-ubizjak@gmail.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/emulate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index c8e292e9a24d..20ed588015f1 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -3708,7 +3708,7 @@ static inline size_t fxstate_size(struct x86_emulate_ctxt *ctxt) */ static int em_fxsave(struct x86_emulate_ctxt *ctxt) { - struct fxregs_state fx_state; + struct fxregs_state fx_state = {}; int rc; rc = check_fxsr(ctxt); @@ -3738,7 +3738,7 @@ static int em_fxsave(struct x86_emulate_ctxt *ctxt) static noinline int fxregs_fixup(struct fxregs_state *fx_state, const size_t used_size) { - struct fxregs_state fx_tmp; + struct fxregs_state fx_tmp = {}; int rc; rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_tmp)); From c522ac04ba9d7ec6003633aa1501c7392cdf8b2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20L=C3=B3pez?= Date: Thu, 12 Feb 2026 15:05:56 +0100 Subject: [PATCH 02/13] KVM: x86/pmu: annotate struct kvm_x86_pmu_event_filter with __counted_by() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit struct kvm_x86_pmu_event_filter has a flexible array member, so annotate it with the field that describes the amount of entries in such array. Opportunistically replace the open-coded array size calculation with flex_array_size() when copying the array portion of the struct from userspace. Signed-off-by: Carlos López Link: https://patch.msgid.link/20260212140556.3883030-2-clopez@suse.de Signed-off-by: Sean Christopherson --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/pmu.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index ff07c45e3c73..d9159b969bd9 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1261,7 +1261,7 @@ struct kvm_x86_pmu_event_filter { __u32 nr_excludes; __u64 *includes; __u64 *excludes; - __u64 events[]; + __u64 events[] __counted_by(nevents); }; enum kvm_apicv_inhibit { diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index bd6b785cf261..e218352e3423 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -1256,7 +1256,7 @@ int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp) r = -EFAULT; if (copy_from_user(filter->events, user_filter->events, - sizeof(filter->events[0]) * filter->nevents)) + flex_array_size(filter, events, filter->nevents))) goto cleanup; r = prepare_filter_lists(filter); From 46ee9d718b9b67a8be067a39e21da6634107ed0e Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Tue, 10 Feb 2026 01:21:43 -0500 Subject: [PATCH 03/13] KVM: Mark halt poll and other module parameters with appropriate memory attributes Add '__read_mostly' to the halt polling parameters (halt_poll_ns, halt_poll_ns_grow, halt_poll_ns_grow_start, halt_poll_ns_shrink) since they are frequently read in hot paths (e.g., vCPU halt handling) but only occasionally updated via sysfs. This improves cache locality on SMP systems. Conversely, mark 'allow_unsafe_mappings' and 'enable_virt_at_load' with '__ro_after_init', as they are set only during module initialization via kernel command line or early sysfs writes and remain constant thereafter. This enhances security by preventing runtime modification and enables compiler optimizations. Signed-off-by: Li RongQing Link: https://patch.msgid.link/20260210062143.1739-1-lirongqing@baidu.com Signed-off-by: Sean Christopherson --- virt/kvm/kvm_main.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 1bc1da66b4b0..66371d8139d8 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -76,22 +76,22 @@ MODULE_DESCRIPTION("Kernel-based Virtual Machine (KVM) Hypervisor"); MODULE_LICENSE("GPL"); /* Architectures should define their poll value according to the halt latency */ -unsigned int halt_poll_ns = KVM_HALT_POLL_NS_DEFAULT; +unsigned int __read_mostly halt_poll_ns = KVM_HALT_POLL_NS_DEFAULT; module_param(halt_poll_ns, uint, 0644); EXPORT_SYMBOL_FOR_KVM_INTERNAL(halt_poll_ns); /* Default doubles per-vcpu halt_poll_ns. */ -unsigned int halt_poll_ns_grow = 2; +unsigned int __read_mostly halt_poll_ns_grow = 2; module_param(halt_poll_ns_grow, uint, 0644); EXPORT_SYMBOL_FOR_KVM_INTERNAL(halt_poll_ns_grow); /* The start value to grow halt_poll_ns from */ -unsigned int halt_poll_ns_grow_start = 10000; /* 10us */ +unsigned int __read_mostly halt_poll_ns_grow_start = 10000; /* 10us */ module_param(halt_poll_ns_grow_start, uint, 0644); EXPORT_SYMBOL_FOR_KVM_INTERNAL(halt_poll_ns_grow_start); /* Default halves per-vcpu halt_poll_ns. */ -unsigned int halt_poll_ns_shrink = 2; +unsigned int __read_mostly halt_poll_ns_shrink = 2; module_param(halt_poll_ns_shrink, uint, 0644); EXPORT_SYMBOL_FOR_KVM_INTERNAL(halt_poll_ns_shrink); @@ -99,7 +99,7 @@ EXPORT_SYMBOL_FOR_KVM_INTERNAL(halt_poll_ns_shrink); * Allow direct access (from KVM or the CPU) without MMU notifier protection * to unpinned pages. */ -static bool allow_unsafe_mappings; +static bool __ro_after_init allow_unsafe_mappings; module_param(allow_unsafe_mappings, bool, 0444); /* @@ -5574,7 +5574,7 @@ static struct miscdevice kvm_dev = { }; #ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING -bool enable_virt_at_load = true; +bool __ro_after_init enable_virt_at_load = true; module_param(enable_virt_at_load, bool, 0444); EXPORT_SYMBOL_FOR_KVM_INTERNAL(enable_virt_at_load); From 5a6b189317501169b0510f2f1256cfc0c6ca81c7 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Mon, 2 Feb 2026 04:50:04 -0500 Subject: [PATCH 04/13] KVM: SVM: Mark module parameters as __ro_after_init for security and performance SVM module parameters such as avic, sev_enabled, npt_enabled, and pause_filter_thresh are configured exclusively during initialization (via kernel command line) and remain constant throughout runtime. Additionally, sev_supported_vmsa_features and svm_gp_erratum_intercept, while not exposed as module parameters, share the same initialization pattern and runtime constancy. Mark these variables with '__ro_after_init' to: - Harden against accidental or malicious runtime modification - Enable compiler and CPU optimizations (improved caching, branch prediction) - Align with kernel security best practices for init-only configuration The exception is 'iopm_base', which retains '__read_mostly' as it requires updates during module unloading. Suggested-by: Sean Christopherson Signed-off-by: Li RongQing Link: https://patch.msgid.link/20260202095004.1765-1-lirongqing@baidu.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/avic.c | 4 ++-- arch/x86/kvm/svm/sev.c | 8 ++++---- arch/x86/kvm/svm/svm.c | 32 ++++++++++++++++---------------- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index f92214b1a938..8c2bc98fed2b 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -86,13 +86,13 @@ static const struct kernel_param_ops avic_ops = { * Enable / disable AVIC. In "auto" mode (default behavior), AVIC is enabled * for Zen4+ CPUs with x2AVIC (and all other criteria for enablement are met). */ -static int avic = AVIC_AUTO_MODE; +static int __ro_after_init avic = AVIC_AUTO_MODE; module_param_cb(avic, &avic_ops, &avic, 0444); __MODULE_PARM_TYPE(avic, "bool"); module_param(enable_ipiv, bool, 0444); -static bool force_avic; +static bool __ro_after_init force_avic; module_param_unsafe(force_avic, bool, 0444); /* Note: diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 3f9c1aa39a0a..77ebc166abfd 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -52,18 +52,18 @@ #define SNP_GUEST_VMM_ERR_GENERIC (~0U) /* enable/disable SEV support */ -static bool sev_enabled = true; +static bool __ro_after_init sev_enabled = true; module_param_named(sev, sev_enabled, bool, 0444); /* enable/disable SEV-ES support */ -static bool sev_es_enabled = true; +static bool __ro_after_init sev_es_enabled = true; module_param_named(sev_es, sev_es_enabled, bool, 0444); /* enable/disable SEV-SNP support */ -static bool sev_snp_enabled = true; +static bool __ro_after_init sev_snp_enabled = true; module_param_named(sev_snp, sev_snp_enabled, bool, 0444); -static unsigned int nr_ciphertext_hiding_asids; +static unsigned int __ro_after_init nr_ciphertext_hiding_asids; module_param_named(ciphertext_hiding_asids, nr_ciphertext_hiding_asids, uint, 0444); #define AP_RESET_HOLD_NONE 0 diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 8f8bc863e214..936f7652d1e4 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -110,52 +110,52 @@ static DEFINE_PER_CPU(u64, current_tsc_ratio); * count only mode. */ -static unsigned short pause_filter_thresh = KVM_DEFAULT_PLE_GAP; +static unsigned short __ro_after_init pause_filter_thresh = KVM_DEFAULT_PLE_GAP; module_param(pause_filter_thresh, ushort, 0444); -static unsigned short pause_filter_count = KVM_SVM_DEFAULT_PLE_WINDOW; +static unsigned short __ro_after_init pause_filter_count = KVM_SVM_DEFAULT_PLE_WINDOW; module_param(pause_filter_count, ushort, 0444); /* Default doubles per-vcpu window every exit. */ -static unsigned short pause_filter_count_grow = KVM_DEFAULT_PLE_WINDOW_GROW; +static unsigned short __ro_after_init pause_filter_count_grow = KVM_DEFAULT_PLE_WINDOW_GROW; module_param(pause_filter_count_grow, ushort, 0444); /* Default resets per-vcpu window every exit to pause_filter_count. */ -static unsigned short pause_filter_count_shrink = KVM_DEFAULT_PLE_WINDOW_SHRINK; +static unsigned short __ro_after_init pause_filter_count_shrink = KVM_DEFAULT_PLE_WINDOW_SHRINK; module_param(pause_filter_count_shrink, ushort, 0444); /* Default is to compute the maximum so we can never overflow. */ -static unsigned short pause_filter_count_max = KVM_SVM_DEFAULT_PLE_WINDOW_MAX; +static unsigned short __ro_after_init pause_filter_count_max = KVM_SVM_DEFAULT_PLE_WINDOW_MAX; module_param(pause_filter_count_max, ushort, 0444); /* * Use nested page tables by default. Note, NPT may get forced off by * svm_hardware_setup() if it's unsupported by hardware or the host kernel. */ -bool npt_enabled = true; +bool __ro_after_init npt_enabled = true; module_param_named(npt, npt_enabled, bool, 0444); /* allow nested virtualization in KVM/SVM */ -static int nested = true; +static int __ro_after_init nested = true; module_param(nested, int, 0444); /* enable/disable Next RIP Save */ -int nrips = true; +int __ro_after_init nrips = true; module_param(nrips, int, 0444); /* enable/disable Virtual VMLOAD VMSAVE */ -static int vls = true; +static int __ro_after_init vls = true; module_param(vls, int, 0444); /* enable/disable Virtual GIF */ -int vgif = true; +int __ro_after_init vgif = true; module_param(vgif, int, 0444); /* enable/disable LBR virtualization */ -int lbrv = true; +int __ro_after_init lbrv = true; module_param(lbrv, int, 0444); -static int tsc_scaling = true; +static int __ro_after_init tsc_scaling = true; module_param(tsc_scaling, int, 0444); module_param(enable_device_posted_irqs, bool, 0444); @@ -164,19 +164,19 @@ bool __read_mostly dump_invalid_vmcb; module_param(dump_invalid_vmcb, bool, 0644); -bool intercept_smi = true; +bool __ro_after_init intercept_smi = true; module_param(intercept_smi, bool, 0444); -bool vnmi = true; +bool __ro_after_init vnmi = true; module_param(vnmi, bool, 0444); module_param(enable_mediated_pmu, bool, 0444); -static bool svm_gp_erratum_intercept = true; +static bool __ro_after_init svm_gp_erratum_intercept = true; static u8 rsm_ins_bytes[] = "\x0f\xaa"; -static unsigned long iopm_base; +static unsigned long __read_mostly iopm_base; DEFINE_PER_CPU(struct svm_cpu_data, svm_data); From e907b4e72488f1df878e7e8acf88d23e49cb3ca7 Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Fri, 27 Feb 2026 01:13:06 +0000 Subject: [PATCH 05/13] KVM: x86: Check for injected exceptions before queuing a debug exception On KVM_SET_GUEST_DEBUG, if a #DB or #BP is injected with KVM_GUESTDBG_INJECT_DB or KVM_GUESTDBG_INJECT_BP, KVM fails with -EBUSY if there is an existing pending exception. This was introduced in commit 4f926bf29186 ("KVM: x86: Polish exception injection via KVM_SET_GUEST_DEBUG") to avoid a warning in kvm_queue_exception(), presumably to avoid overriding a pending exception. This added another (arguably nice) property, if there's a pending exception, KVM_SET_GUEST_DEBUG cannot cause a #DF or triple fault. However, if an exception is injected, KVM_SET_GUEST_DEBUG will cause a #DF or triple fault in the guest, as kvm_multiple_exception() combines them. Check for both pending and injected exceptions for KVM_GUESTDBG_INJECT_DB and KVM_GUESTDBG_INJECT_BP, to avoid accidentally injecting a #DB or triple fault. Signed-off-by: Yosry Ahmed base-commit: a68a4bbc5b9ce5b722473399f05cb05217abaee8 Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a03530795707..658476815b6a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -12529,7 +12529,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) { r = -EBUSY; - if (kvm_is_exception_pending(vcpu)) + if (kvm_is_exception_pending(vcpu) || vcpu->arch.exception.injected) goto out; if (dbg->control & KVM_GUESTDBG_INJECT_DB) kvm_queue_exception(vcpu, DB_VECTOR); From 690dc03859e7907bc995f389618c748619559477 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Tue, 10 Feb 2026 15:45:42 -0800 Subject: [PATCH 06/13] KVM: x86: Ignore cpuid faulting in SMM The Intel Virtualization Technology FlexMigration Application Note says, "When CPUID faulting is enabled, all executions of the CPUID instruction outside system-management mode (SMM) cause a general-protection exception (#GP(0)) if the current privilege level (CPL) is greater than 0." Always allow the execution of CPUID in SMM. Fixes: db2336a80489 ("KVM: x86: virtualize cpuid faulting") Signed-off-by: Jim Mattson Link: https://patch.msgid.link/20260210234613.1383279-1-jmattson@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/cpuid.c | 3 ++- arch/x86/kvm/emulate.c | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index d2486506a808..baf9a2860d98 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -2157,7 +2157,8 @@ int kvm_emulate_cpuid(struct kvm_vcpu *vcpu) { u32 eax, ebx, ecx, edx; - if (cpuid_fault_enabled(vcpu) && !kvm_require_cpl(vcpu, 0)) + if (!is_smm(vcpu) && cpuid_fault_enabled(vcpu) && + !kvm_require_cpl(vcpu, 0)) return 1; eax = kvm_rax_read(vcpu); diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 20ed588015f1..500711c6f069 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -3583,10 +3583,10 @@ static int em_cpuid(struct x86_emulate_ctxt *ctxt) u64 msr = 0; ctxt->ops->get_msr(ctxt, MSR_MISC_FEATURES_ENABLES, &msr); - if (msr & MSR_MISC_FEATURES_ENABLES_CPUID_FAULT && - ctxt->ops->cpl(ctxt)) { + if (!ctxt->ops->is_smm(ctxt) && + (msr & MSR_MISC_FEATURES_ENABLES_CPUID_FAULT) && + ctxt->ops->cpl(ctxt)) return emulate_gp(ctxt, 0); - } eax = reg_read(ctxt, VCPU_REGS_RAX); ecx = reg_read(ctxt, VCPU_REGS_RCX); From 43e41846ac7ebee529c3684b5726d71224f4fbdd Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Mon, 2 Mar 2026 15:42:49 +0000 Subject: [PATCH 07/13] KVM: x86: Drop redundant call to kvm_deliver_exception_payload() In kvm_check_and_inject_events(), kvm_deliver_exception_payload() is called for pending #DB exceptions. However, shortly after, the per-vendor inject_exception callbacks are made. Both vmx_inject_exception() and svm_inject_exception() unconditionally call kvm_deliver_exception_payload(), so the call in kvm_check_and_inject_events() is redundant. Note that the extra call for pending #DB exceptions is harmless, as kvm_deliver_exception_payload() clears exception.has_payload after the first call. The call in kvm_check_and_inject_events() was added in commit f10c729ff965 ("kvm: vmx: Defer setting of DR6 until #DB delivery"). At that point, the call was likely needed because svm_queue_exception() checked whether an exception for L2 is intercepted by L1 before calling kvm_deliver_exception_payload(), as SVM did not have a check_nested_events callback. Since DR6 is updated before the #DB intercept in SVM (unlike VMX), it was necessary to deliver the DR6 payload before calling svm_queue_exception(). After that, commit 7c86663b68ba ("KVM: nSVM: inject exceptions via svm_check_nested_events") added a check_nested_events callback for SVM, which checked for L1 intercepts for L2's exceptions, and delivered the the payload appropriately before the intercept. At that point, svm_queue_exception() started calling kvm_deliver_exception_payload() unconditionally, and the call to kvm_deliver_exception_payload() from its caller became redundant. No functional change intended. Signed-off-by: Yosry Ahmed Link: https://patch.msgid.link/20260302154249.784529-1-yosry@kernel.org Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 658476815b6a..d5731499f4c2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10736,12 +10736,10 @@ static int kvm_check_and_inject_events(struct kvm_vcpu *vcpu, __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) | X86_EFLAGS_RF); - if (vcpu->arch.exception.vector == DB_VECTOR) { - kvm_deliver_exception_payload(vcpu, &vcpu->arch.exception); - if (vcpu->arch.dr7 & DR7_GD) { - vcpu->arch.dr7 &= ~DR7_GD; - kvm_update_dr7(vcpu); - } + if (vcpu->arch.exception.vector == DB_VECTOR && + vcpu->arch.dr7 & DR7_GD) { + vcpu->arch.dr7 &= ~DR7_GD; + kvm_update_dr7(vcpu); } kvm_inject_exception(vcpu); From 3b27c82ba2f3dcf8075e3df74dbf7294d2955d1a Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Sat, 7 Mar 2026 01:16:17 +0000 Subject: [PATCH 08/13] KVM: x86: Move some EFER bits enablement to common code Move EFER bits enablement that only depend on CPU support to common code, as there is no reason to do it in vendor code. Leave EFER.SVME and EFER.LMSLE enablement in SVM code as they depend on vendor module parameters. Having the enablement in common code ensures that if a vendor starts supporting an existing feature, KVM doesn't end up advertising to userspace but not allowing the EFER bit to be set. No functional change intended. Suggested-by: Sean Christopherson Signed-off-by: Yosry Ahmed Link: https://patch.msgid.link/20260307011619.2324234-2-yosry@kernel.org Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/svm.c | 7 ------- arch/x86/kvm/vmx/vmx.c | 4 ---- arch/x86/kvm/x86.c | 14 ++++++++++++++ 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 936f7652d1e4..424ed50e6bfa 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -5405,14 +5405,10 @@ static __init int svm_hardware_setup(void) pr_err_ratelimited("NX (Execute Disable) not supported\n"); return -EOPNOTSUPP; } - kvm_enable_efer_bits(EFER_NX); kvm_caps.supported_xcr0 &= ~(XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR); - if (boot_cpu_has(X86_FEATURE_FXSR_OPT)) - kvm_enable_efer_bits(EFER_FFXSR); - if (tsc_scaling) { if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) { tsc_scaling = false; @@ -5426,9 +5422,6 @@ static __init int svm_hardware_setup(void) tsc_aux_uret_slot = kvm_add_user_return_msr(MSR_TSC_AUX); - if (boot_cpu_has(X86_FEATURE_AUTOIBRS)) - kvm_enable_efer_bits(EFER_AUTOIBRS); - /* Check for pause filtering support */ if (!boot_cpu_has(X86_FEATURE_PAUSEFILTER)) { pause_filter_count = 0; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 967b58a8ab9d..bc28da49f283 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -8694,10 +8694,6 @@ __init int vmx_hardware_setup(void) vmx_setup_user_return_msrs(); - - if (boot_cpu_has(X86_FEATURE_NX)) - kvm_enable_efer_bits(EFER_NX); - if (boot_cpu_has(X86_FEATURE_MPX)) { rdmsrq(MSR_IA32_BNDCFGS, host_bndcfgs); WARN_ONCE(host_bndcfgs, "BNDCFGS in host will be lost"); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d5731499f4c2..7e8c1816cffd 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9998,6 +9998,18 @@ void kvm_setup_xss_caps(void) } EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_setup_xss_caps); +static void kvm_setup_efer_caps(void) +{ + if (boot_cpu_has(X86_FEATURE_NX)) + kvm_enable_efer_bits(EFER_NX); + + if (boot_cpu_has(X86_FEATURE_FXSR_OPT)) + kvm_enable_efer_bits(EFER_FFXSR); + + if (boot_cpu_has(X86_FEATURE_AUTOIBRS)) + kvm_enable_efer_bits(EFER_AUTOIBRS); +} + static inline void kvm_ops_update(struct kvm_x86_init_ops *ops) { memcpy(&kvm_x86_ops, ops->runtime_ops, sizeof(kvm_x86_ops)); @@ -10134,6 +10146,8 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops) if (r != 0) goto out_mmu_exit; + kvm_setup_efer_caps(); + enable_device_posted_irqs &= enable_apicv && irq_remapping_cap(IRQ_POSTING_CAP); From d216449f253c7039c3e6a0276279c117a5198ce0 Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Sat, 7 Mar 2026 01:16:18 +0000 Subject: [PATCH 09/13] KVM: x86: Use kvm_cpu_cap_has() for EFER bits enablement checks Instead of checking that the hardware supports underlying features for EFER bits, check if KVM supports them. It is practically the same, but this removes a subtle dependency on kvm_set_cpu_caps() enabling the relevant CPUID features. No functional change intended. Suggested-by: Sean Christopherson Signed-off-by: Yosry Ahmed Link: https://patch.msgid.link/20260307011619.2324234-3-yosry@kernel.org Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7e8c1816cffd..3753d0b62ded 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10000,13 +10000,13 @@ EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_setup_xss_caps); static void kvm_setup_efer_caps(void) { - if (boot_cpu_has(X86_FEATURE_NX)) + if (kvm_cpu_cap_has(X86_FEATURE_NX)) kvm_enable_efer_bits(EFER_NX); - if (boot_cpu_has(X86_FEATURE_FXSR_OPT)) + if (kvm_cpu_cap_has(X86_FEATURE_FXSR_OPT)) kvm_enable_efer_bits(EFER_FFXSR); - if (boot_cpu_has(X86_FEATURE_AUTOIBRS)) + if (kvm_cpu_cap_has(X86_FEATURE_AUTOIBRS)) kvm_enable_efer_bits(EFER_AUTOIBRS); } From 26c9bfc0fac240540581cfbe58031b412f98aaf8 Mon Sep 17 00:00:00 2001 From: xuanqingshi <1356292400@qq.com> Date: Fri, 6 Mar 2026 17:12:32 +0800 Subject: [PATCH 10/13] KVM: x86: Add LAPIC guard in kvm_apic_write_nodecode() kvm_apic_write_nodecode() dereferences vcpu->arch.apic without first checking whether the in-kernel LAPIC has been initialized. If it has not (e.g. the vCPU was created without an in-kernel LAPIC), the dereference results in a NULL pointer access. While APIC-write VM-Exits are not expected to occur on a vCPU without an in-kernel LAPIC, kvm_apic_write_nodecode() should be robust against such a scenario as a defense-in-depth measure, e.g. to guard against KVM bugs or CPU errata that could generate a spurious APIC-write VM-Exit. Use KVM_BUG_ON() with lapic_in_kernel() instead of a simple WARN_ON_ONCE(), as suggested by Sean Christopherson, so that KVM kills the VM outright rather than letting it continue in a broken state. Found by a VMCS-targeted fuzzer based on syzkaller. Signed-off-by: xuanqingshi <1356292400@qq.com> Link: https://patch.msgid.link/tencent_7A9F1B4D75468C0CF5DE1B6902038C948B07@qq.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/lapic.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 9381c58d4c85..02f2039d5f99 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2657,6 +2657,9 @@ void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset) { struct kvm_lapic *apic = vcpu->arch.apic; + if (KVM_BUG_ON(!lapic_in_kernel(vcpu), vcpu->kvm)) + return; + /* * ICR is a single 64-bit register when x2APIC is enabled, all others * registers hold 32-bit values. For legacy xAPIC, ICR writes need to From 00d572d4cd7d23f9a7a498d2d824b68ba3ea5b88 Mon Sep 17 00:00:00 2001 From: Anel Orazgaliyeva Date: Fri, 6 Mar 2026 08:59:52 +0100 Subject: [PATCH 11/13] KVM: X86: Fix array_index_nospec protection in __pv_send_ipi The __pv_send_ipi() function iterates over up to BITS_PER_LONG vCPUs starting from the APIC ID specified in its 'min' argument, which is provided by the guest. Commit c87bd4dd43a6 used array_index_nospec() to clamp the value of 'min' but then the for_each_set_bit() loop dereferences higher indices without further protection. Theoretically, a guest can trigger speculative access to up to BITS_PER_LONG elements off the end of the phys_map[] array. (In practice it would probably need aggressive loop unrolling by the compiler to go more than one element off the end, and even that seems unlikely, but the theoretical possibility exists.) Move the array_index_nospec() inside the loop to protect the [map + i] index which is actually being used each time. Fixes: c87bd4dd43a6 ("KVM: x86: use array_index_nospec with indices that come from guest") Fixes: bdf7ffc89922 ("KVM: LAPIC: Fix pv ipis out-of-bounds access") Fixes: 4180bf1b655a ("KVM: X86: Implement "send IPI" hypercall") Signed-off-by: Anel Orazgaliyeva Signed-off-by: David Woodhouse Reviewed-by: Jim Mattson Link: https://patch.msgid.link/9d50fc3ca9e8e58f551d015f95d51a3c29ce6ccc.camel@infradead.org Signed-off-by: Sean Christopherson --- arch/x86/kvm/lapic.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 02f2039d5f99..e3ec4d8607c1 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -840,16 +840,16 @@ static int __pv_send_ipi(unsigned long *ipi_bitmap, struct kvm_apic_map *map, { int i, count = 0; struct kvm_vcpu *vcpu; + size_t map_index; if (min > map->max_apic_id) return 0; - min = array_index_nospec(min, map->max_apic_id + 1); - for_each_set_bit(i, ipi_bitmap, - min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) { - if (map->phys_map[min + i]) { - vcpu = map->phys_map[min + i]->vcpu; + min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) { + map_index = array_index_nospec(min + i, map->max_apic_id + 1); + if (map->phys_map[map_index]) { + vcpu = map->phys_map[map_index]->vcpu; count += kvm_apic_set_irq(vcpu, irq, NULL); } } From 55be358e17af4aa218f173cd6eb17a0dc423cd70 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 2 Mar 2026 13:26:19 -0800 Subject: [PATCH 12/13] KVM: x86: Immediately fail the build when possible if required #define is missing Guard usage of the must-be-defined macros in KVM's multi-include headers with the existing #ifdefs that attempt to alert the developer to a missing macro, and spit out an explicit #error message if a macro is missing, as referencing the missing macro completely defeats the purpose of the #ifdef (the compiler spews a ton of error messages and buries the targeted error message). Suggested-by: Alexey Dobriyan Reviewed-by: Yuan Yao Link: https://patch.msgid.link/20260302212619.710873-1-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/include/asm/kvm-x86-ops.h | 10 ++++++---- arch/x86/include/asm/kvm-x86-pmu-ops.h | 8 +++++--- arch/x86/kvm/vmx/vmcs_shadow_fields.h | 5 +++-- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index de709fb5bd76..3776cf5382a2 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -1,8 +1,9 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#if !defined(KVM_X86_OP) || !defined(KVM_X86_OP_OPTIONAL) -BUILD_BUG_ON(1) -#endif - +#if !defined(KVM_X86_OP) || \ + !defined(KVM_X86_OP_OPTIONAL) || \ + !defined(KVM_X86_OP_OPTIONAL_RET0) +#error Missing one or more KVM_X86_OP #defines +#else /* * KVM_X86_OP() and KVM_X86_OP_OPTIONAL() are used to help generate * both DECLARE/DEFINE_STATIC_CALL() invocations and @@ -148,6 +149,7 @@ KVM_X86_OP_OPTIONAL(alloc_apic_backing_page) KVM_X86_OP_OPTIONAL_RET0(gmem_prepare) KVM_X86_OP_OPTIONAL_RET0(gmem_max_mapping_level) KVM_X86_OP_OPTIONAL(gmem_invalidate) +#endif #undef KVM_X86_OP #undef KVM_X86_OP_OPTIONAL diff --git a/arch/x86/include/asm/kvm-x86-pmu-ops.h b/arch/x86/include/asm/kvm-x86-pmu-ops.h index f0aa6996811f..d5452b3433b7 100644 --- a/arch/x86/include/asm/kvm-x86-pmu-ops.h +++ b/arch/x86/include/asm/kvm-x86-pmu-ops.h @@ -1,7 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#if !defined(KVM_X86_PMU_OP) || !defined(KVM_X86_PMU_OP_OPTIONAL) -BUILD_BUG_ON(1) -#endif +#if !defined(KVM_X86_PMU_OP) || \ + !defined(KVM_X86_PMU_OP_OPTIONAL) +#error Missing one or more KVM_X86_PMU_OP #defines +#else /* * KVM_X86_PMU_OP() and KVM_X86_PMU_OP_OPTIONAL() are used to help generate @@ -26,6 +27,7 @@ KVM_X86_PMU_OP_OPTIONAL(cleanup) KVM_X86_PMU_OP_OPTIONAL(write_global_ctrl) KVM_X86_PMU_OP(mediated_load) KVM_X86_PMU_OP(mediated_put) +#endif #undef KVM_X86_PMU_OP #undef KVM_X86_PMU_OP_OPTIONAL diff --git a/arch/x86/kvm/vmx/vmcs_shadow_fields.h b/arch/x86/kvm/vmx/vmcs_shadow_fields.h index cad128d1657b..67e821c2be6d 100644 --- a/arch/x86/kvm/vmx/vmcs_shadow_fields.h +++ b/arch/x86/kvm/vmx/vmcs_shadow_fields.h @@ -1,6 +1,6 @@ #if !defined(SHADOW_FIELD_RO) && !defined(SHADOW_FIELD_RW) -BUILD_BUG_ON(1) -#endif +#error Must #define at least one of SHADOW_FIELD_RO or SHADOW_FIELD_RW +#else #ifndef SHADOW_FIELD_RO #define SHADOW_FIELD_RO(x, y) @@ -74,6 +74,7 @@ SHADOW_FIELD_RW(HOST_GS_BASE, host_gs_base) /* 64-bit */ SHADOW_FIELD_RO(GUEST_PHYSICAL_ADDRESS, guest_physical_address) SHADOW_FIELD_RO(GUEST_PHYSICAL_ADDRESS_HIGH, guest_physical_address) +#endif #undef SHADOW_FIELD_RO #undef SHADOW_FIELD_RW From de0bfdc7137d5132b71dd1fe7aa3ca3df4d68241 Mon Sep 17 00:00:00 2001 From: Nikunj A Dadhania Date: Tue, 10 Feb 2026 05:35:11 +0000 Subject: [PATCH 13/13] KVM: x86: Advertise AVX512 Bit Matrix Multiply (BMM) to userspace Advertise AVX512 Bit Matrix Multiply (BMM) and Bit Reversal instructions to userspace via CPUID leaf 0x80000021_EAX[23]. This feature enables bit matrix multiply operations and bit reversal. Like most AVX instructions, there are no intercept controls for individual instructions, and no extra work is needed in KVM to enable correct execution of the instructions in the guest. The instructions and CPUID feature are first described in: AMD64 Bit Matrix Multiply and Bit Reversal Instructions Publication #69192 Revision: 1.00 Issue Date: January 2026 While at it, reorder PREFETCHI in KVM's initialization sequence to match the CPUID bit position order for better organization. Signed-off-by: Nikunj A Dadhania Link: https://patch.msgid.link/20260210053511.1612505-1-nikunj@amd.com [sean: massage changelog] Signed-off-by: Sean Christopherson --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/kvm/cpuid.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index dbe104df339b..de7bd88e539d 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -473,6 +473,7 @@ #define X86_FEATURE_GP_ON_USER_CPUID (20*32+17) /* User CPUID faulting */ #define X86_FEATURE_PREFETCHI (20*32+20) /* Prefetch Data/Instruction to Cache Level */ +#define X86_FEATURE_AVX512_BMM (20*32+23) /* AVX512 Bit Matrix Multiply instructions */ #define X86_FEATURE_ERAPS (20*32+24) /* Enhanced Return Address Predictor Security */ #define X86_FEATURE_SBPB (20*32+27) /* Selective Branch Prediction Barrier */ #define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* MSR_PRED_CMD[IBPB] flushes all branch type predictions */ diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index baf9a2860d98..d740c45039c9 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -1243,11 +1243,12 @@ void kvm_initialize_cpu_caps(void) F(NULL_SEL_CLR_BASE), /* UpperAddressIgnore */ F(AUTOIBRS), - F(PREFETCHI), EMULATED_F(NO_SMM_CTL_MSR), /* PrefetchCtlMsr */ /* GpOnUserCpuid */ /* EPSF */ + F(PREFETCHI), + F(AVX512_BMM), F(ERAPS), SYNTHESIZED_F(SBPB), SYNTHESIZED_F(IBPB_BRTYPE),