KVM x86 changes for 6.3:

- Advertise support for Intel's fancy new fast REP string features
 
  - Fix a double-shootdown issue in the emergency reboot code
 
  - Ensure GIF=1 and disable SVM during an emergency reboot, i.e. give SVM
    similar treatment to VMX
 
  - Update Xen's TSC info CPUID sub-leaves as appropriate
 
  - Add support for Hyper-V's extended hypercalls, where "support" at this
    point is just forwarding the hypercalls to userspace
 
  - Clean up the kvm->lock vs. kvm->srcu sequences when updating the PMU and
    MSR filters
 
  - One-off fixes and cleanups
 -----BEGIN PGP SIGNATURE-----
 
 iQJGBAABCgAwFiEEMHr+pfEFOIzK+KY1YJEiAU0MEvkFAmPsDu4SHHNlYW5qY0Bn
 b29nbGUuY29tAAoJEGCRIgFNDBL5pn8P/3hp/sraIuTufi/mBxq35ATBpi437+T7
 ZxgzmwovouJ7sMIIgXi8rDKYe8z6FnI5J29jCIudI0+f8DltAzEUyjjHQPQtpGSX
 lMg4Z1ReHZ+mVPYrjklEyi3TqRFHIeqYSt9gwU7zGGXfPOWqN7SWpl8SMqUG6wUv
 XOk4OuPYnsgWsPE7HJhKJ8Lb4rR9NsuzzBHOQLGVatTlvgbXmYQSWzH1vQOlljVt
 Q86rMHN9Watq7f+K/TD7yyXSU70tKrfgTu9Hudd6rKdVgWTpmRWCLzopnU2K3Fc7
 w7SRmOedZbN47Y36FXm+Ui3Dz83mFsdYIN4Mv85IX+TXHCKk2mPHYoXs0jaZmt1q
 aAaJW2vGhap/6dlRII0Ohfs6/afqAy75/UFOa9i9z6KIlONvbs+ohJuMzm+CQX1b
 VJbnGpd7UCcEuIOFzf/gd4fNRxHzan6zSqh2+cMT2pUrEfigkZu0pVJbkVsf5eec
 7PMCjWVUwrdQP9p7v9IaEkMvLC1pyc6kMB7Fb5bthwiGKdQIIXd2J5mqY/Js6XZx
 oI5OJNBXEThpa3DnT2zKDNLLXmstkxiepGjOflrAiytDWmHyq7J1Usrjivb/EJHL
 EuONHXKG3dnASwMxFw+t7cXGd+Y/5HWI2ID5qf+K2ty2YczumsFQjB6qtxWiZbL4
 0JOzGHl3QQXl
 =xECB
 -----END PGP SIGNATURE-----

Merge tag 'kvm-x86-misc-6.3' of https://github.com/kvm-x86/linux into HEAD

KVM x86 changes for 6.3:

 - Advertise support for Intel's fancy new fast REP string features

 - Fix a double-shootdown issue in the emergency reboot code

 - Ensure GIF=1 and disable SVM during an emergency reboot, i.e. give SVM
   similar treatment to VMX

 - Update Xen's TSC info CPUID sub-leaves as appropriate

 - Add support for Hyper-V's extended hypercalls, where "support" at this
   point is just forwarding the hypercalls to userspace

 - Clean up the kvm->lock vs. kvm->srcu sequences when updating the PMU and
   MSR filters

 - One-off fixes and cleanups
This commit is contained in:
Paolo Bonzini 2023-02-15 08:22:09 -05:00
commit 4f2a5a6b96
23 changed files with 327 additions and 109 deletions

View File

@ -312,6 +312,9 @@
#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */
#define X86_FEATURE_CMPCCXADD (12*32+ 7) /* "" CMPccXADD instructions */
#define X86_FEATURE_FZRM (12*32+10) /* "" Fast zero-length REP MOVSB */
#define X86_FEATURE_FSRS (12*32+11) /* "" Fast short REP STOSB */
#define X86_FEATURE_FSRC (12*32+12) /* "" Fast short REP {CMPSB,SCASB} */
#define X86_FEATURE_AMX_FP16 (12*32+21) /* "" AMX fp16 Support */
#define X86_FEATURE_AVX_IFMA (12*32+23) /* "" Support for VPMADD52[H,L]UQ */

View File

@ -678,6 +678,11 @@ struct kvm_vcpu_hv {
} nested;
};
struct kvm_hypervisor_cpuid {
u32 base;
u32 limit;
};
/* Xen HVM per vcpu emulation context */
struct kvm_vcpu_xen {
u64 hypercall_rip;
@ -698,6 +703,7 @@ struct kvm_vcpu_xen {
struct hrtimer timer;
int poll_evtchn;
struct timer_list poll_timer;
struct kvm_hypervisor_cpuid cpuid;
};
struct kvm_queued_exception {
@ -826,7 +832,7 @@ struct kvm_vcpu_arch {
int cpuid_nent;
struct kvm_cpuid_entry2 *cpuid_entries;
u32 kvm_cpuid_base;
struct kvm_hypervisor_cpuid kvm_cpuid;
u64 reserved_gpa_bits;
int maxphyaddr;
@ -1327,7 +1333,6 @@ struct kvm_arch {
u32 bsp_vcpu_id;
u64 disabled_quirks;
int cpu_dirty_logging_count;
enum kvm_irqchip_mode irqchip_mode;
u8 nr_reserved_ioapic_pins;

View File

@ -25,6 +25,8 @@ void __noreturn machine_real_restart(unsigned int type);
#define MRR_BIOS 0
#define MRR_APM 1
void cpu_emergency_disable_virtualization(void);
typedef void (*nmi_shootdown_cb)(int, struct pt_regs*);
void nmi_panic_self_stop(struct pt_regs *regs);
void nmi_shootdown_cpus(nmi_shootdown_cb callback);

View File

@ -126,7 +126,21 @@ static inline void cpu_svm_disable(void)
wrmsrl(MSR_VM_HSAVE_PA, 0);
rdmsrl(MSR_EFER, efer);
wrmsrl(MSR_EFER, efer & ~EFER_SVME);
if (efer & EFER_SVME) {
/*
* Force GIF=1 prior to disabling SVM to ensure INIT and NMI
* aren't blocked, e.g. if a fatal error occurred between CLGI
* and STGI. Note, STGI may #UD if SVM is disabled from NMI
* context between reading EFER and executing STGI. In that
* case, GIF must already be set, otherwise the NMI would have
* been blocked, so just eat the fault.
*/
asm_volatile_goto("1: stgi\n\t"
_ASM_EXTABLE(1b, %l[fault])
::: "memory" : fault);
fault:
wrmsrl(MSR_EFER, efer & ~EFER_SVME);
}
}
/** Makes sure SVM is disabled, if it is supported on the CPU

View File

@ -38,9 +38,11 @@ extern struct start_info *xen_start_info;
#include <asm/processor.h>
#define XEN_SIGNATURE "XenVMMXenVMM"
static inline uint32_t xen_cpuid_base(void)
{
return hypervisor_cpuid_base("XenVMMXenVMM", 2);
return hypervisor_cpuid_base(XEN_SIGNATURE, 2);
}
struct pci_dev;

View File

@ -9,6 +9,7 @@
#include <linux/types.h>
#include <linux/ioctl.h>
#include <linux/stddef.h>
#define KVM_PIO_PAGE_OFFSET 1
#define KVM_COALESCED_MMIO_PAGE_OFFSET 2
@ -507,8 +508,8 @@ struct kvm_nested_state {
* KVM_{GET,PUT}_NESTED_STATE ioctl values.
*/
union {
struct kvm_vmx_nested_state_data vmx[0];
struct kvm_svm_nested_state_data svm[0];
__DECLARE_FLEX_ARRAY(struct kvm_vmx_nested_state_data, vmx);
__DECLARE_FLEX_ARRAY(struct kvm_svm_nested_state_data, svm);
} data;
};

View File

@ -37,7 +37,6 @@
#include <linux/kdebug.h>
#include <asm/cpu.h>
#include <asm/reboot.h>
#include <asm/virtext.h>
#include <asm/intel_pt.h>
#include <asm/crash.h>
#include <asm/cmdline.h>
@ -81,15 +80,6 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
*/
cpu_crash_vmclear_loaded_vmcss();
/* Disable VMX or SVM if needed.
*
* We need to disable virtualization on all CPUs.
* Having VMX or SVM enabled on any CPU may break rebooting
* after the kdump kernel has finished its task.
*/
cpu_emergency_vmxoff();
cpu_emergency_svm_disable();
/*
* Disable Intel PT to stop its logging
*/
@ -148,12 +138,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
*/
cpu_crash_vmclear_loaded_vmcss();
/* Booting kdump kernel with VMX or SVM enabled won't work,
* because (among other limitations) we can't disable paging
* with the virt flags.
*/
cpu_emergency_vmxoff();
cpu_emergency_svm_disable();
cpu_emergency_disable_virtualization();
/*
* Disable Intel PT to stop its logging

View File

@ -528,33 +528,29 @@ static inline void kb_wait(void)
}
}
static void vmxoff_nmi(int cpu, struct pt_regs *regs)
{
cpu_emergency_vmxoff();
}
static inline void nmi_shootdown_cpus_on_restart(void);
/* Use NMIs as IPIs to tell all CPUs to disable virtualization */
static void emergency_vmx_disable_all(void)
static void emergency_reboot_disable_virtualization(void)
{
/* Just make sure we won't change CPUs while doing this */
local_irq_disable();
/*
* Disable VMX on all CPUs before rebooting, otherwise we risk hanging
* the machine, because the CPU blocks INIT when it's in VMX root.
* Disable virtualization on all CPUs before rebooting to avoid hanging
* the system, as VMX and SVM block INIT when running in the host.
*
* We can't take any locks and we may be on an inconsistent state, so
* use NMIs as IPIs to tell the other CPUs to exit VMX root and halt.
* use NMIs as IPIs to tell the other CPUs to disable VMX/SVM and halt.
*
* Do the NMI shootdown even if VMX if off on _this_ CPU, as that
* doesn't prevent a different CPU from being in VMX root operation.
* Do the NMI shootdown even if virtualization is off on _this_ CPU, as
* other CPUs may have virtualization enabled.
*/
if (cpu_has_vmx()) {
/* Safely force _this_ CPU out of VMX root operation. */
__cpu_emergency_vmxoff();
if (cpu_has_vmx() || cpu_has_svm(NULL)) {
/* Safely force _this_ CPU out of VMX/SVM operation. */
cpu_emergency_disable_virtualization();
/* Halt and exit VMX root operation on the other CPUs. */
nmi_shootdown_cpus(vmxoff_nmi);
/* Disable VMX/SVM and halt on other CPUs. */
nmi_shootdown_cpus_on_restart();
}
}
@ -590,7 +586,7 @@ static void native_machine_emergency_restart(void)
unsigned short mode;
if (reboot_emergency)
emergency_vmx_disable_all();
emergency_reboot_disable_virtualization();
tboot_shutdown(TB_SHUTDOWN_REBOOT);
@ -795,6 +791,17 @@ void machine_crash_shutdown(struct pt_regs *regs)
/* This is the CPU performing the emergency shutdown work. */
int crashing_cpu = -1;
/*
* Disable virtualization, i.e. VMX or SVM, to ensure INIT is recognized during
* reboot. VMX blocks INIT if the CPU is post-VMXON, and SVM blocks INIT if
* GIF=0, i.e. if the crash occurred between CLGI and STGI.
*/
void cpu_emergency_disable_virtualization(void)
{
cpu_emergency_vmxoff();
cpu_emergency_svm_disable();
}
#if defined(CONFIG_SMP)
static nmi_shootdown_cb shootdown_callback;
@ -817,7 +824,14 @@ static int crash_nmi_callback(unsigned int val, struct pt_regs *regs)
return NMI_HANDLED;
local_irq_disable();
shootdown_callback(cpu, regs);
if (shootdown_callback)
shootdown_callback(cpu, regs);
/*
* Prepare the CPU for reboot _after_ invoking the callback so that the
* callback can safely use virtualization instructions, e.g. VMCLEAR.
*/
cpu_emergency_disable_virtualization();
atomic_dec(&waiting_for_crash_ipi);
/* Assume hlt works */
@ -828,18 +842,32 @@ static int crash_nmi_callback(unsigned int val, struct pt_regs *regs)
return NMI_HANDLED;
}
/*
* Halt all other CPUs, calling the specified function on each of them
/**
* nmi_shootdown_cpus - Stop other CPUs via NMI
* @callback: Optional callback to be invoked from the NMI handler
*
* This function can be used to halt all other CPUs on crash
* or emergency reboot time. The function passed as parameter
* will be called inside a NMI handler on all CPUs.
* The NMI handler on the remote CPUs invokes @callback, if not
* NULL, first and then disables virtualization to ensure that
* INIT is recognized during reboot.
*
* nmi_shootdown_cpus() can only be invoked once. After the first
* invocation all other CPUs are stuck in crash_nmi_callback() and
* cannot respond to a second NMI.
*/
void nmi_shootdown_cpus(nmi_shootdown_cb callback)
{
unsigned long msecs;
local_irq_disable();
/*
* Avoid certain doom if a shootdown already occurred; re-registering
* the NMI handler will cause list corruption, modifying the callback
* will do who knows what, etc...
*/
if (WARN_ON_ONCE(crash_ipi_issued))
return;
/* Make a note of crashing cpu. Will be used in NMI callback. */
crashing_cpu = safe_smp_processor_id();
@ -867,7 +895,17 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
msecs--;
}
/* Leave the nmi callback set */
/*
* Leave the nmi callback set, shootdown is a one-time thing. Clearing
* the callback could result in a NULL pointer dereference if a CPU
* (finally) responds after the timeout expires.
*/
}
static inline void nmi_shootdown_cpus_on_restart(void)
{
if (!crash_ipi_issued)
nmi_shootdown_cpus(NULL);
}
/*
@ -897,6 +935,8 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
/* No other CPUs to shoot down */
}
static inline void nmi_shootdown_cpus_on_restart(void) { }
void run_crash_ipi_callback(struct pt_regs *regs)
{
}

View File

@ -32,7 +32,7 @@
#include <asm/mce.h>
#include <asm/trace/irq_vectors.h>
#include <asm/kexec.h>
#include <asm/virtext.h>
#include <asm/reboot.h>
/*
* Some notes on x86 processor bugs affecting SMP operation:
@ -122,7 +122,7 @@ static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs)
if (raw_smp_processor_id() == atomic_read(&stopping_cpu))
return NMI_HANDLED;
cpu_emergency_vmxoff();
cpu_emergency_disable_virtualization();
stop_this_cpu(NULL);
return NMI_HANDLED;
@ -134,7 +134,7 @@ static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs)
DEFINE_IDTENTRY_SYSVEC(sysvec_reboot)
{
ack_APIC_irq();
cpu_emergency_vmxoff();
cpu_emergency_disable_virtualization();
stop_this_cpu(NULL);
}

View File

@ -26,6 +26,7 @@
#include "mmu.h"
#include "trace.h"
#include "pmu.h"
#include "xen.h"
/*
* Unlike "struct cpuinfo_x86.x86_capability", kvm_cpu_caps doesn't need to be
@ -181,15 +182,15 @@ static int kvm_cpuid_check_equal(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2
return 0;
}
static void kvm_update_kvm_cpuid_base(struct kvm_vcpu *vcpu)
static struct kvm_hypervisor_cpuid kvm_get_hypervisor_cpuid(struct kvm_vcpu *vcpu,
const char *sig)
{
u32 function;
struct kvm_hypervisor_cpuid cpuid = {};
struct kvm_cpuid_entry2 *entry;
u32 base;
vcpu->arch.kvm_cpuid_base = 0;
for_each_possible_hypervisor_cpuid_base(function) {
entry = kvm_find_cpuid_entry(vcpu, function);
for_each_possible_hypervisor_cpuid_base(base) {
entry = kvm_find_cpuid_entry(vcpu, base);
if (entry) {
u32 signature[3];
@ -198,19 +199,21 @@ static void kvm_update_kvm_cpuid_base(struct kvm_vcpu *vcpu)
signature[1] = entry->ecx;
signature[2] = entry->edx;
BUILD_BUG_ON(sizeof(signature) > sizeof(KVM_SIGNATURE));
if (!memcmp(signature, KVM_SIGNATURE, sizeof(signature))) {
vcpu->arch.kvm_cpuid_base = function;
if (!memcmp(signature, sig, sizeof(signature))) {
cpuid.base = base;
cpuid.limit = entry->eax;
break;
}
}
}
return cpuid;
}
static struct kvm_cpuid_entry2 *__kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu,
struct kvm_cpuid_entry2 *entries, int nent)
{
u32 base = vcpu->arch.kvm_cpuid_base;
u32 base = vcpu->arch.kvm_cpuid.base;
if (!base)
return NULL;
@ -440,7 +443,8 @@ static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2,
vcpu->arch.cpuid_entries = e2;
vcpu->arch.cpuid_nent = nent;
kvm_update_kvm_cpuid_base(vcpu);
vcpu->arch.kvm_cpuid = kvm_get_hypervisor_cpuid(vcpu, KVM_SIGNATURE);
vcpu->arch.xen.cpuid = kvm_get_hypervisor_cpuid(vcpu, XEN_SIGNATURE);
kvm_vcpu_after_set_cpuid(vcpu);
return 0;
@ -664,8 +668,9 @@ void kvm_set_cpu_caps(void)
kvm_cpu_cap_set(X86_FEATURE_SPEC_CTRL_SSBD);
kvm_cpu_cap_mask(CPUID_7_1_EAX,
F(AVX_VNNI) | F(AVX512_BF16) | F(CMPCCXADD) | F(AMX_FP16) |
F(AVX_IFMA)
F(AVX_VNNI) | F(AVX512_BF16) | F(CMPCCXADD) |
F(FZRM) | F(FSRS) | F(FSRC) |
F(AMX_FP16) | F(AVX_IFMA)
);
kvm_cpu_cap_init_kvm_defined(CPUID_7_1_EDX,

View File

@ -1634,7 +1634,7 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
case VCPU_SREG_SS:
/*
* segment is not a writable data segment or segment
* selector's RPL != CPL or segment selector's RPL != CPL
* selector's RPL != CPL or DPL != CPL
*/
if (rpl != cpl || (seg_desc.type & 0xa) != 0x2 || dpl != cpl)
goto exception;
@ -1696,11 +1696,11 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
/*
* segment is not a data or readable code segment or
* ((segment is a data or nonconforming code segment)
* and (both RPL and CPL > DPL))
* and ((RPL > DPL) or (CPL > DPL)))
*/
if ((seg_desc.type & 0xa) == 0x8 ||
(((seg_desc.type & 0xc) != 0xc) &&
(rpl > dpl && cpl > dpl)))
(rpl > dpl || cpl > dpl)))
goto exception;
break;
}

View File

@ -44,6 +44,24 @@
#define KVM_HV_MAX_SPARSE_VCPU_SET_BITS DIV_ROUND_UP(KVM_MAX_VCPUS, HV_VCPUS_PER_SPARSE_BANK)
/*
* As per Hyper-V TLFS, extended hypercalls start from 0x8001
* (HvExtCallQueryCapabilities). Response of this hypercalls is a 64 bit value
* where each bit tells which extended hypercall is available besides
* HvExtCallQueryCapabilities.
*
* 0x8001 - First extended hypercall, HvExtCallQueryCapabilities, no bit
* assigned.
*
* 0x8002 - Bit 0
* 0x8003 - Bit 1
* ..
* 0x8041 - Bit 63
*
* Therefore, HV_EXT_CALL_MAX = 0x8001 + 64
*/
#define HV_EXT_CALL_MAX (HV_EXT_CALL_QUERY_CAPABILITIES + 64)
static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer,
bool vcpu_kick);
@ -2439,6 +2457,9 @@ static bool hv_check_hypercall_access(struct kvm_vcpu_hv *hv_vcpu, u16 code)
case HVCALL_SEND_IPI:
return hv_vcpu->cpuid_cache.enlightenments_eax &
HV_X64_CLUSTER_IPI_RECOMMENDED;
case HV_EXT_CALL_QUERY_CAPABILITIES ... HV_EXT_CALL_MAX:
return hv_vcpu->cpuid_cache.features_ebx &
HV_ENABLE_EXTENDED_HYPERCALLS;
default:
break;
}
@ -2531,14 +2552,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
break;
}
vcpu->run->exit_reason = KVM_EXIT_HYPERV;
vcpu->run->hyperv.type = KVM_EXIT_HYPERV_HCALL;
vcpu->run->hyperv.u.hcall.input = hc.param;
vcpu->run->hyperv.u.hcall.params[0] = hc.ingpa;
vcpu->run->hyperv.u.hcall.params[1] = hc.outgpa;
vcpu->arch.complete_userspace_io =
kvm_hv_hypercall_complete_userspace;
return 0;
goto hypercall_userspace_exit;
case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST:
if (unlikely(hc.var_cnt)) {
ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
@ -2597,15 +2611,14 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
ret = HV_STATUS_OPERATION_DENIED;
break;
}
vcpu->run->exit_reason = KVM_EXIT_HYPERV;
vcpu->run->hyperv.type = KVM_EXIT_HYPERV_HCALL;
vcpu->run->hyperv.u.hcall.input = hc.param;
vcpu->run->hyperv.u.hcall.params[0] = hc.ingpa;
vcpu->run->hyperv.u.hcall.params[1] = hc.outgpa;
vcpu->arch.complete_userspace_io =
kvm_hv_hypercall_complete_userspace;
return 0;
goto hypercall_userspace_exit;
}
case HV_EXT_CALL_QUERY_CAPABILITIES ... HV_EXT_CALL_MAX:
if (unlikely(hc.fast)) {
ret = HV_STATUS_INVALID_PARAMETER;
break;
}
goto hypercall_userspace_exit;
default:
ret = HV_STATUS_INVALID_HYPERCALL_CODE;
break;
@ -2613,6 +2626,15 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
hypercall_complete:
return kvm_hv_hypercall_complete(vcpu, ret);
hypercall_userspace_exit:
vcpu->run->exit_reason = KVM_EXIT_HYPERV;
vcpu->run->hyperv.type = KVM_EXIT_HYPERV_HCALL;
vcpu->run->hyperv.u.hcall.input = hc.param;
vcpu->run->hyperv.u.hcall.params[0] = hc.ingpa;
vcpu->run->hyperv.u.hcall.params[1] = hc.outgpa;
vcpu->arch.complete_userspace_io = kvm_hv_hypercall_complete_userspace;
return 0;
}
void kvm_hv_init_vm(struct kvm *kvm)
@ -2756,6 +2778,7 @@ int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
ent->ebx |= HV_POST_MESSAGES;
ent->ebx |= HV_SIGNAL_EVENTS;
ent->ebx |= HV_ENABLE_EXTENDED_HYPERCALLS;
ent->edx |= HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE;
ent->edx |= HV_FEATURE_FREQUENCY_MSRS_AVAILABLE;

View File

@ -634,6 +634,7 @@ int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp)
mutex_lock(&kvm->lock);
filter = rcu_replace_pointer(kvm->arch.pmu_event_filter, filter,
mutex_is_locked(&kvm->lock));
mutex_unlock(&kvm->lock);
synchronize_srcu_expedited(&kvm->srcu);
BUILD_BUG_ON(sizeof(((struct kvm_pmu *)0)->reprogram_pmi) >
@ -644,8 +645,6 @@ int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp)
kvm_make_all_cpus_request(kvm, KVM_REQ_PMU);
mutex_unlock(&kvm->lock);
r = 0;
cleanup:
kfree(filter);

View File

@ -4606,7 +4606,7 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
* it needs to be set here when dirty logging is already active, e.g.
* if this vCPU was created after dirty logging was enabled.
*/
if (!vcpu->kvm->arch.cpu_dirty_logging_count)
if (!enable_pml || !atomic_read(&vcpu->kvm->nr_memslots_dirty_logging))
exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
if (cpu_has_vmx_xsaves()) {
@ -7988,17 +7988,20 @@ void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
if (WARN_ON_ONCE(!enable_pml))
return;
if (is_guest_mode(vcpu)) {
vmx->nested.update_vmcs01_cpu_dirty_logging = true;
return;
}
/*
* Note, cpu_dirty_logging_count can be changed concurrent with this
* Note, nr_memslots_dirty_logging can be changed concurrent with this
* code, but in that case another update request will be made and so
* the guest will never run with a stale PML value.
*/
if (vcpu->kvm->arch.cpu_dirty_logging_count)
if (atomic_read(&vcpu->kvm->nr_memslots_dirty_logging))
secondary_exec_controls_setbit(vmx, SECONDARY_EXEC_ENABLE_PML);
else
secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_ENABLE_PML);

View File

@ -3161,6 +3161,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
&vcpu->hv_clock.tsc_shift,
&vcpu->hv_clock.tsc_to_system_mul);
vcpu->hw_tsc_khz = tgt_tsc_khz;
kvm_xen_update_tsc_info(v);
}
vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
@ -4290,8 +4291,8 @@ static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
{
struct kvm_msrs msrs;
struct kvm_msr_entry *entries;
int r, n;
unsigned size;
int r;
r = -EFAULT;
if (copy_from_user(&msrs, user_msrs, sizeof(msrs)))
@ -4308,17 +4309,11 @@ static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
goto out;
}
r = n = __msr_io(vcpu, &msrs, entries, do_msr);
if (r < 0)
goto out_free;
r = __msr_io(vcpu, &msrs, entries, do_msr);
r = -EFAULT;
if (writeback && copy_to_user(user_msrs->entries, entries, size))
goto out_free;
r = -EFAULT;
r = n;
out_free:
kfree(entries);
out:
return r;
@ -6465,7 +6460,7 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm,
struct kvm_x86_msr_filter *new_filter, *old_filter;
bool default_allow;
bool empty = true;
int r = 0;
int r;
u32 i;
if (filter->flags & ~KVM_MSR_FILTER_VALID_MASK)
@ -6491,17 +6486,14 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm,
}
mutex_lock(&kvm->lock);
/* The per-VM filter is protected by kvm->lock... */
old_filter = srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1);
rcu_assign_pointer(kvm->arch.msr_filter, new_filter);
old_filter = rcu_replace_pointer(kvm->arch.msr_filter, new_filter,
mutex_is_locked(&kvm->lock));
mutex_unlock(&kvm->lock);
synchronize_srcu(&kvm->srcu);
kvm_free_msr_filter(old_filter);
kvm_make_all_cpus_request(kvm, KVM_REQ_MSR_FILTER_CHANGED);
mutex_unlock(&kvm->lock);
return 0;
}
@ -12273,7 +12265,7 @@ void __user * __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa,
*/
hva = vm_mmap(NULL, 0, size, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_ANONYMOUS, 0);
if (IS_ERR((void *)hva))
if (IS_ERR_VALUE(hva))
return (void __user *)hva;
} else {
if (!slot || !slot->npages)
@ -12488,16 +12480,14 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
static void kvm_mmu_update_cpu_dirty_logging(struct kvm *kvm, bool enable)
{
struct kvm_arch *ka = &kvm->arch;
int nr_slots;
if (!kvm_x86_ops.cpu_dirty_log_size)
return;
if ((enable && ++ka->cpu_dirty_logging_count == 1) ||
(!enable && --ka->cpu_dirty_logging_count == 0))
nr_slots = atomic_read(&kvm->nr_memslots_dirty_logging);
if ((enable && nr_slots == 1) || !nr_slots)
kvm_make_all_cpus_request(kvm, KVM_REQ_UPDATE_CPU_DIRTY_LOGGING);
WARN_ON_ONCE(ka->cpu_dirty_logging_count < 0);
}
static void kvm_mmu_slot_apply_flags(struct kvm *kvm,

View File

@ -23,6 +23,9 @@
#include <xen/interface/event_channel.h>
#include <xen/interface/sched.h>
#include <asm/xen/cpuid.h>
#include "cpuid.h"
#include "trace.h"
static int kvm_xen_set_evtchn(struct kvm_xen_evtchn *xe, struct kvm *kvm);
@ -2077,6 +2080,29 @@ void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu)
del_timer_sync(&vcpu->arch.xen.poll_timer);
}
void kvm_xen_update_tsc_info(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *entry;
u32 function;
if (!vcpu->arch.xen.cpuid.base)
return;
function = vcpu->arch.xen.cpuid.base | XEN_CPUID_LEAF(3);
if (function > vcpu->arch.xen.cpuid.limit)
return;
entry = kvm_find_cpuid_entry_index(vcpu, function, 1);
if (entry) {
entry->ecx = vcpu->arch.hv_clock.tsc_to_system_mul;
entry->edx = vcpu->arch.hv_clock.tsc_shift;
}
entry = kvm_find_cpuid_entry_index(vcpu, function, 2);
if (entry)
entry->eax = vcpu->arch.hw_tsc_khz;
}
void kvm_xen_init_vm(struct kvm *kvm)
{
mutex_init(&kvm->arch.xen.xen_lock);

View File

@ -9,6 +9,8 @@
#ifndef __ARCH_X86_KVM_XEN_H__
#define __ARCH_X86_KVM_XEN_H__
#include <asm/xen/hypervisor.h>
#ifdef CONFIG_KVM_XEN
#include <linux/jump_label_ratelimit.h>
@ -32,6 +34,7 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe,
int kvm_xen_setup_evtchn(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *e,
const struct kvm_irq_routing_entry *ue);
void kvm_xen_update_tsc_info(struct kvm_vcpu *vcpu);
static inline bool kvm_xen_msr_enabled(struct kvm *kvm)
{
@ -135,6 +138,10 @@ static inline bool kvm_xen_timer_enabled(struct kvm_vcpu *vcpu)
{
return false;
}
static inline void kvm_xen_update_tsc_info(struct kvm_vcpu *vcpu)
{
}
#endif
int kvm_xen_hypercall(struct kvm_vcpu *vcpu);

View File

@ -67,6 +67,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/fix_hypercall_test
TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock
TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
TEST_GEN_PROGS_x86_64 += x86_64/hyperv_evmcs
TEST_GEN_PROGS_x86_64 += x86_64/hyperv_extended_hypercalls
TEST_GEN_PROGS_x86_64 += x86_64/hyperv_features
TEST_GEN_PROGS_x86_64 += x86_64/hyperv_ipi
TEST_GEN_PROGS_x86_64 += x86_64/hyperv_svm_test

View File

@ -137,6 +137,8 @@
KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 11)
#define HV_CPU_MANAGEMENT \
KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 12)
#define HV_ENABLE_EXTENDED_HYPERCALLS \
KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 20)
#define HV_ISOLATION \
KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 22)
@ -213,6 +215,9 @@
#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af
#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0
/* Extended hypercalls */
#define HV_EXT_CALL_QUERY_CAPABILITIES 0x8001
#define HV_FLUSH_ALL_PROCESSORS BIT(0)
#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES BIT(1)
#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY BIT(2)

View File

@ -1844,6 +1844,7 @@ static struct exit_reason {
{KVM_EXIT_X86_RDMSR, "RDMSR"},
{KVM_EXIT_X86_WRMSR, "WRMSR"},
{KVM_EXIT_XEN, "XEN"},
{KVM_EXIT_HYPERV, "HYPERV"},
#ifdef KVM_EXIT_MEMORY_NOT_PRESENT
{KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"},
#endif

View File

@ -104,7 +104,7 @@ static void guest_main(struct ms_hyperv_tsc_page *tsc_page, vm_paddr_t tsc_page_
/* Set Guest OS id to enable Hyper-V emulation */
GUEST_SYNC(1);
wrmsr(HV_X64_MSR_GUEST_OS_ID, (u64)0x8100 << 48);
wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
GUEST_SYNC(2);
check_tsc_msr_rdtsc();

View File

@ -0,0 +1,97 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Test Hyper-V extended hypercall, HV_EXT_CALL_QUERY_CAPABILITIES (0x8001),
* exit to userspace and receive result in guest.
*
* Negative tests are present in hyperv_features.c
*
* Copyright 2022 Google LLC
* Author: Vipin Sharma <vipinsh@google.com>
*/
#include "kvm_util.h"
#include "processor.h"
#include "hyperv.h"
/* Any value is fine */
#define EXT_CAPABILITIES 0xbull
static void guest_code(vm_paddr_t in_pg_gpa, vm_paddr_t out_pg_gpa,
vm_vaddr_t out_pg_gva)
{
uint64_t *output_gva;
wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
wrmsr(HV_X64_MSR_HYPERCALL, in_pg_gpa);
output_gva = (uint64_t *)out_pg_gva;
hyperv_hypercall(HV_EXT_CALL_QUERY_CAPABILITIES, in_pg_gpa, out_pg_gpa);
/* TLFS states output will be a uint64_t value */
GUEST_ASSERT_EQ(*output_gva, EXT_CAPABILITIES);
GUEST_DONE();
}
int main(void)
{
vm_vaddr_t hcall_out_page;
vm_vaddr_t hcall_in_page;
struct kvm_vcpu *vcpu;
struct kvm_run *run;
struct kvm_vm *vm;
uint64_t *outval;
struct ucall uc;
/* Verify if extended hypercalls are supported */
if (!kvm_cpuid_has(kvm_get_supported_hv_cpuid(),
HV_ENABLE_EXTENDED_HYPERCALLS)) {
print_skip("Extended calls not supported by the kernel");
exit(KSFT_SKIP);
}
vm = vm_create_with_one_vcpu(&vcpu, guest_code);
run = vcpu->run;
vcpu_set_hv_cpuid(vcpu);
/* Hypercall input */
hcall_in_page = vm_vaddr_alloc_pages(vm, 1);
memset(addr_gva2hva(vm, hcall_in_page), 0x0, vm->page_size);
/* Hypercall output */
hcall_out_page = vm_vaddr_alloc_pages(vm, 1);
memset(addr_gva2hva(vm, hcall_out_page), 0x0, vm->page_size);
vcpu_args_set(vcpu, 3, addr_gva2gpa(vm, hcall_in_page),
addr_gva2gpa(vm, hcall_out_page), hcall_out_page);
vcpu_run(vcpu);
TEST_ASSERT(run->exit_reason == KVM_EXIT_HYPERV,
"Unexpected exit reason: %u (%s)",
run->exit_reason, exit_reason_str(run->exit_reason));
outval = addr_gpa2hva(vm, run->hyperv.u.hcall.params[1]);
*outval = EXT_CAPABILITIES;
run->hyperv.u.hcall.result = HV_STATUS_SUCCESS;
vcpu_run(vcpu);
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
"Unexpected exit reason: %u (%s)",
run->exit_reason, exit_reason_str(run->exit_reason));
switch (get_ucall(vcpu, &uc)) {
case UCALL_ABORT:
REPORT_GUEST_ASSERT_2(uc, "arg1 = %ld, arg2 = %ld");
break;
case UCALL_DONE:
break;
default:
TEST_FAIL("Unhandled ucall: %ld", uc.cmd);
}
kvm_vm_free(vm);
return 0;
}

View File

@ -649,6 +649,15 @@ static void guest_test_hcalls_access(void)
hcall->expect = HV_STATUS_SUCCESS;
break;
case 19:
hcall->control = HV_EXT_CALL_QUERY_CAPABILITIES;
hcall->expect = HV_STATUS_ACCESS_DENIED;
break;
case 20:
vcpu_set_cpuid_feature(vcpu, HV_ENABLE_EXTENDED_HYPERCALLS);
hcall->control = HV_EXT_CALL_QUERY_CAPABILITIES | HV_HYPERCALL_FAST_BIT;
hcall->expect = HV_STATUS_INVALID_PARAMETER;
break;
case 21:
kvm_vm_free(vm);
return;
}