KVM: VMX: Bundle all L1 data cache flush mitigation code together

Move vmx_l1d_flush(), vmx_cleanup_l1d_flush(), and the vmentry_l1d_flush
param code up in vmx.c so that all of the L1 data cache flushing code is
bundled together.  This will allow conditioning the mitigation code on
CONFIG_CPU_MITIGATIONS=y with minimal #ifdefs.

No functional change intended.

Reviewed-by: Brendan Jackman <jackmanb@google.com>
Reviewed-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
Link: https://patch.msgid.link/20251113233746.1703361-8-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
This commit is contained in:
Sean Christopherson 2025-11-13 15:37:44 -08:00
parent fc704b5789
commit 0abd9610d6

View File

@ -302,6 +302,16 @@ static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
return 0;
}
static void vmx_cleanup_l1d_flush(void)
{
if (vmx_l1d_flush_pages) {
free_pages((unsigned long)vmx_l1d_flush_pages, L1D_CACHE_ORDER);
vmx_l1d_flush_pages = NULL;
}
/* Restore state so sysfs ignores VMX */
l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO;
}
static int vmentry_l1d_flush_parse(const char *s)
{
unsigned int i;
@ -352,6 +362,83 @@ static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp)
return sysfs_emit(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option);
}
/*
* Software based L1D cache flush which is used when microcode providing
* the cache control MSR is not loaded.
*
* The L1D cache is 32 KiB on Nehalem and later microarchitectures, but to
* flush it is required to read in 64 KiB because the replacement algorithm
* is not exactly LRU. This could be sized at runtime via topology
* information but as all relevant affected CPUs have 32KiB L1D cache size
* there is no point in doing so.
*/
static noinstr void vmx_l1d_flush(struct kvm_vcpu *vcpu)
{
int size = PAGE_SIZE << L1D_CACHE_ORDER;
/*
* This code is only executed when the flush mode is 'cond' or
* 'always'
*/
if (static_branch_likely(&vmx_l1d_flush_cond)) {
bool flush_l1d;
/*
* Clear the per-vcpu flush bit, it gets set again if the vCPU
* is reloaded, i.e. if the vCPU is scheduled out or if KVM
* exits to userspace, or if KVM reaches one of the unsafe
* VMEXIT handlers, e.g. if KVM calls into the emulator.
*/
flush_l1d = vcpu->arch.l1tf_flush_l1d;
vcpu->arch.l1tf_flush_l1d = false;
/*
* Clear the per-cpu flush bit, it gets set again from
* the interrupt handlers.
*/
flush_l1d |= kvm_get_cpu_l1tf_flush_l1d();
kvm_clear_cpu_l1tf_flush_l1d();
if (!flush_l1d)
return;
}
vcpu->stat.l1d_flush++;
if (static_cpu_has(X86_FEATURE_FLUSH_L1D)) {
native_wrmsrq(MSR_IA32_FLUSH_CMD, L1D_FLUSH);
return;
}
asm volatile(
/* First ensure the pages are in the TLB */
"xorl %%eax, %%eax\n"
".Lpopulate_tlb:\n\t"
"movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t"
"addl $4096, %%eax\n\t"
"cmpl %%eax, %[size]\n\t"
"jne .Lpopulate_tlb\n\t"
"xorl %%eax, %%eax\n\t"
"cpuid\n\t"
/* Now fill the cache */
"xorl %%eax, %%eax\n"
".Lfill_cache:\n"
"movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t"
"addl $64, %%eax\n\t"
"cmpl %%eax, %[size]\n\t"
"jne .Lfill_cache\n\t"
"lfence\n"
:: [flush_pages] "r" (vmx_l1d_flush_pages),
[size] "r" (size)
: "eax", "ebx", "ecx", "edx");
}
static const struct kernel_param_ops vmentry_l1d_flush_ops = {
.set = vmentry_l1d_flush_set,
.get = vmentry_l1d_flush_get,
};
module_param_cb(vmentry_l1d_flush, &vmentry_l1d_flush_ops, NULL, 0644);
static __always_inline void vmx_disable_fb_clear(struct vcpu_vmx *vmx)
{
u64 msr;
@ -404,12 +491,6 @@ static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx)
vmx->disable_fb_clear = false;
}
static const struct kernel_param_ops vmentry_l1d_flush_ops = {
.set = vmentry_l1d_flush_set,
.get = vmentry_l1d_flush_get,
};
module_param_cb(vmentry_l1d_flush, &vmentry_l1d_flush_ops, NULL, 0644);
static u32 vmx_segment_access_rights(struct kvm_segment *var);
void vmx_vmexit(void);
@ -6646,77 +6727,6 @@ int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
return ret;
}
/*
* Software based L1D cache flush which is used when microcode providing
* the cache control MSR is not loaded.
*
* The L1D cache is 32 KiB on Nehalem and later microarchitectures, but to
* flush it is required to read in 64 KiB because the replacement algorithm
* is not exactly LRU. This could be sized at runtime via topology
* information but as all relevant affected CPUs have 32KiB L1D cache size
* there is no point in doing so.
*/
static noinstr void vmx_l1d_flush(struct kvm_vcpu *vcpu)
{
int size = PAGE_SIZE << L1D_CACHE_ORDER;
/*
* This code is only executed when the flush mode is 'cond' or
* 'always'
*/
if (static_branch_likely(&vmx_l1d_flush_cond)) {
bool flush_l1d;
/*
* Clear the per-vcpu flush bit, it gets set again if the vCPU
* is reloaded, i.e. if the vCPU is scheduled out or if KVM
* exits to userspace, or if KVM reaches one of the unsafe
* VMEXIT handlers, e.g. if KVM calls into the emulator.
*/
flush_l1d = vcpu->arch.l1tf_flush_l1d;
vcpu->arch.l1tf_flush_l1d = false;
/*
* Clear the per-cpu flush bit, it gets set again from
* the interrupt handlers.
*/
flush_l1d |= kvm_get_cpu_l1tf_flush_l1d();
kvm_clear_cpu_l1tf_flush_l1d();
if (!flush_l1d)
return;
}
vcpu->stat.l1d_flush++;
if (static_cpu_has(X86_FEATURE_FLUSH_L1D)) {
native_wrmsrq(MSR_IA32_FLUSH_CMD, L1D_FLUSH);
return;
}
asm volatile(
/* First ensure the pages are in the TLB */
"xorl %%eax, %%eax\n"
".Lpopulate_tlb:\n\t"
"movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t"
"addl $4096, %%eax\n\t"
"cmpl %%eax, %[size]\n\t"
"jne .Lpopulate_tlb\n\t"
"xorl %%eax, %%eax\n\t"
"cpuid\n\t"
/* Now fill the cache */
"xorl %%eax, %%eax\n"
".Lfill_cache:\n"
"movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t"
"addl $64, %%eax\n\t"
"cmpl %%eax, %[size]\n\t"
"jne .Lfill_cache\n\t"
"lfence\n"
:: [flush_pages] "r" (vmx_l1d_flush_pages),
[size] "r" (size)
: "eax", "ebx", "ecx", "edx");
}
void vmx_update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
{
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
@ -8651,16 +8661,6 @@ __init int vmx_hardware_setup(void)
return r;
}
static void vmx_cleanup_l1d_flush(void)
{
if (vmx_l1d_flush_pages) {
free_pages((unsigned long)vmx_l1d_flush_pages, L1D_CACHE_ORDER);
vmx_l1d_flush_pages = NULL;
}
/* Restore state so sysfs ignores VMX */
l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO;
}
void vmx_exit(void)
{
allow_smaller_maxphyaddr = false;