KVM Xen changes for 6.15

- Don't write to the Xen hypercall page on MSR writes that are initiated by
    the host (userspace or KVM) to fix a class of bugs where KVM can write to
    guest memory at unexpected times, e.g. during vCPU creation if userspace has
    set the Xen hypercall MSR index to collide with an MSR that KVM emulates.
 
  - Restrict the Xen hypercall MSR indx to the unofficial synthetic range to
    reduce the set of possible collisions with MSRs that are emulated by KVM
    (collisions can still happen as KVM emulates Hyper-V MSRs, which also reside
    in the synthetic range).
 
  - Clean up and optimize KVM's handling of Xen MSR writes and xen_hvm_config.
 
  - Update Xen TSC leaves during CPUID emulation instead of modifying the CPUID
    entries when updating PV clocks, as there is no guarantee PV clocks will be
    updated between TSC frequency changes and CPUID emulation, and guest reads
    of Xen TSC should be rare, i.e. are not a hot path.
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEKTobbabEP7vbhhN9OlYIJqCjN/0FAmfZpO4ACgkQOlYIJqCj
 N/3AMQ/+J4+yOslekq4DHYhZaTvJFf0MqhPgTuf2s6I5p449JWn9rebqK2w0M9Xj
 fJy7/rboQA4QflBuhTiWcC3Dl1lYtxUqqtcCH9608eqKhbeay87OfV0/vgMwWBRs
 FhcOcp1587esJj5gz5M5R9i3S5Yq7Q4fp1+DmS23X41Zz5nTb2q80MY5UklMgI9I
 Ydaw1liB8rRHWbdt9yM4UsI8k4fMuj0PE8pEapoTSfsZm8J4cG9qHKrvuWjuFSCF
 l18Hyl11nWq8eZ5Vg2E2UIz0EgtWIHKu1/fi4av20/JTuA8Mon15WC5q4BBmDDdD
 keR9OJLYclVBh8KweiJSTUE6PcD9A8pWmoWyp6aGRiyyUVhbwysYTzT7uytwQz6w
 RH/vVHe0o/m19SnD9rqsRVObc7dOGorFXScMcf4Qxoq9yQm2p0lJDvq6c9uECLMV
 RIfZrXe9HS67RB9INybS+1fVlLcd0bLgGfG7q9lWLEABD45HpM5daQ4Mlf8+MIE0
 V7egx9t69/WALbJka8pWNISeFRKkB1LRjite+XXasqJ0iFeneM8UKFVB4OMtXL9g
 M0m8ovvySySMkoCq3yMlKxXh4rJ1/D556/bAaJBukMPWFWX9FQaP33U3FuzId7jH
 ztZVugViQMNiIbQVgUSAcgpuJvgpttAciACODlaw2u2Bk1Txmn0=
 =c3Wt
 -----END PGP SIGNATURE-----

Merge tag 'kvm-x86-xen-6.15' of https://github.com/kvm-x86/linux into HEAD

KVM Xen changes for 6.15

 - Don't write to the Xen hypercall page on MSR writes that are initiated by
   the host (userspace or KVM) to fix a class of bugs where KVM can write to
   guest memory at unexpected times, e.g. during vCPU creation if userspace has
   set the Xen hypercall MSR index to collide with an MSR that KVM emulates.

 - Restrict the Xen hypercall MSR indx to the unofficial synthetic range to
   reduce the set of possible collisions with MSRs that are emulated by KVM
   (collisions can still happen as KVM emulates Hyper-V MSRs, which also reside
   in the synthetic range).

 - Clean up and optimize KVM's handling of Xen MSR writes and xen_hvm_config.

 - Update Xen TSC leaves during CPUID emulation instead of modifying the CPUID
   entries when updating PV clocks, as there is no guarantee PV clocks will be
   updated between TSC frequency changes and CPUID emulation, and guest reads
   of Xen TSC should be rare, i.e. are not a hot path.
This commit is contained in:
Paolo Bonzini 2025-03-19 09:14:59 -04:00
commit 3ecf162a31
8 changed files with 80 additions and 43 deletions

View File

@ -1000,6 +1000,10 @@ blobs in userspace. When the guest writes the MSR, kvm copies one
page of a blob (32- or 64-bit, depending on the vcpu mode) to guest
memory.
The MSR index must be in the range [0x40000000, 0x4fffffff], i.e. must reside
in the range that is unofficially reserved for use by hypervisors. The min/max
values are enumerated via KVM_XEN_MSR_MIN_INDEX and KVM_XEN_MSR_MAX_INDEX.
::
struct kvm_xen_hvm_config {

View File

@ -1192,6 +1192,8 @@ struct kvm_xen {
struct gfn_to_pfn_cache shinfo_cache;
struct idr evtchn_ports;
unsigned long poll_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)];
struct kvm_xen_hvm_config hvm_config;
};
#endif
@ -1412,8 +1414,6 @@ struct kvm_arch {
struct delayed_work kvmclock_update_work;
struct delayed_work kvmclock_sync_work;
struct kvm_xen_hvm_config xen_hvm_config;
/* reads protected by irq_srcu, writes by irq_lock */
struct hlist_head mask_notifier_list;

View File

@ -559,6 +559,9 @@ struct kvm_x86_mce {
#define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7)
#define KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA (1 << 8)
#define KVM_XEN_MSR_MIN_INDEX 0x40000000u
#define KVM_XEN_MSR_MAX_INDEX 0x4fffffffu
struct kvm_xen_hvm_config {
__u32 flags;
__u32 msr;

View File

@ -2008,6 +2008,22 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
} else if (function == 0x80000007) {
if (kvm_hv_invtsc_suppressed(vcpu))
*edx &= ~feature_bit(CONSTANT_TSC);
} else if (IS_ENABLED(CONFIG_KVM_XEN) &&
kvm_xen_is_tsc_leaf(vcpu, function)) {
/*
* Update guest TSC frequency information if necessary.
* Ignore failures, there is no sane value that can be
* provided if KVM can't get the TSC frequency.
*/
if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu))
kvm_guest_time_update(vcpu);
if (index == 1) {
*ecx = vcpu->arch.pvclock_tsc_mul;
*edx = vcpu->arch.pvclock_tsc_shift;
} else if (index == 2) {
*eax = vcpu->arch.hw_tsc_khz;
}
}
} else {
*eax = *ebx = *ecx = *edx = 0;

View File

@ -3169,7 +3169,7 @@ static void kvm_setup_guest_pvclock(struct pvclock_vcpu_time_info *ref_hv_clock,
trace_kvm_pvclock_update(vcpu->vcpu_id, &hv_clock);
}
static int kvm_guest_time_update(struct kvm_vcpu *v)
int kvm_guest_time_update(struct kvm_vcpu *v)
{
struct pvclock_vcpu_time_info hv_clock = {};
unsigned long flags, tgt_tsc_khz;
@ -3242,7 +3242,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
&vcpu->pvclock_tsc_shift,
&vcpu->pvclock_tsc_mul);
vcpu->hw_tsc_khz = tgt_tsc_khz;
kvm_xen_update_tsc_info(v);
}
hv_clock.tsc_shift = vcpu->pvclock_tsc_shift;
@ -3282,7 +3281,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
*
* Note! Clear TSC_STABLE only for Xen clocks, i.e. the order matters!
*/
if (ka->xen_hvm_config.flags & KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE)
if (ka->xen.hvm_config.flags & KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE)
hv_clock.flags &= ~PVCLOCK_TSC_STABLE_BIT;
if (vcpu->xen.vcpu_info_cache.active)
@ -3745,7 +3744,13 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
u32 msr = msr_info->index;
u64 data = msr_info->data;
if (msr && msr == vcpu->kvm->arch.xen_hvm_config.msr)
/*
* Do not allow host-initiated writes to trigger the Xen hypercall
* page setup; it could incur locking paths which are not expected
* if userspace sets the MSR in an unusual location.
*/
if (kvm_xen_is_hypercall_page_msr(vcpu->kvm, msr) &&
!msr_info->host_initiated)
return kvm_xen_write_hypercall_page(vcpu, data);
switch (msr) {

View File

@ -369,6 +369,7 @@ void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip);
u64 get_kvmclock_ns(struct kvm *kvm);
uint64_t kvm_get_wall_clock_epoch(struct kvm *kvm);
bool kvm_get_monotonic_and_clockread(s64 *kernel_ns, u64 *tsc_timestamp);
int kvm_guest_time_update(struct kvm_vcpu *v);
int kvm_read_guest_virt(struct kvm_vcpu *vcpu,
gva_t addr, void *val, unsigned int bytes,

View File

@ -1335,10 +1335,10 @@ int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data)
* Note, truncation is a non-issue as 'lm' is guaranteed to be
* false for a 32-bit kernel, i.e. when hva_t is only 4 bytes.
*/
hva_t blob_addr = lm ? kvm->arch.xen_hvm_config.blob_addr_64
: kvm->arch.xen_hvm_config.blob_addr_32;
u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64
: kvm->arch.xen_hvm_config.blob_size_32;
hva_t blob_addr = lm ? kvm->arch.xen.hvm_config.blob_addr_64
: kvm->arch.xen.hvm_config.blob_addr_32;
u8 blob_size = lm ? kvm->arch.xen.hvm_config.blob_size_64
: kvm->arch.xen.hvm_config.blob_size_32;
u8 *page;
int ret;
@ -1379,15 +1379,24 @@ int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc)
xhc->blob_size_32 || xhc->blob_size_64))
return -EINVAL;
/*
* Restrict the MSR to the range that is unofficially reserved for
* synthetic, virtualization-defined MSRs, e.g. to prevent confusing
* KVM by colliding with a real MSR that requires special handling.
*/
if (xhc->msr &&
(xhc->msr < KVM_XEN_MSR_MIN_INDEX || xhc->msr > KVM_XEN_MSR_MAX_INDEX))
return -EINVAL;
mutex_lock(&kvm->arch.xen.xen_lock);
if (xhc->msr && !kvm->arch.xen_hvm_config.msr)
if (xhc->msr && !kvm->arch.xen.hvm_config.msr)
static_branch_inc(&kvm_xen_enabled.key);
else if (!xhc->msr && kvm->arch.xen_hvm_config.msr)
else if (!xhc->msr && kvm->arch.xen.hvm_config.msr)
static_branch_slow_dec_deferred(&kvm_xen_enabled);
old_flags = kvm->arch.xen_hvm_config.flags;
memcpy(&kvm->arch.xen_hvm_config, xhc, sizeof(*xhc));
old_flags = kvm->arch.xen.hvm_config.flags;
memcpy(&kvm->arch.xen.hvm_config, xhc, sizeof(*xhc));
mutex_unlock(&kvm->arch.xen.xen_lock);
@ -1468,7 +1477,7 @@ static bool kvm_xen_schedop_poll(struct kvm_vcpu *vcpu, bool longmode,
int i;
if (!lapic_in_kernel(vcpu) ||
!(vcpu->kvm->arch.xen_hvm_config.flags & KVM_XEN_HVM_CONFIG_EVTCHN_SEND))
!(vcpu->kvm->arch.xen.hvm_config.flags & KVM_XEN_HVM_CONFIG_EVTCHN_SEND))
return false;
if (IS_ENABLED(CONFIG_64BIT) && !longmode) {
@ -2302,29 +2311,6 @@ void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu)
del_timer_sync(&vcpu->arch.xen.poll_timer);
}
void kvm_xen_update_tsc_info(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *entry;
u32 function;
if (!vcpu->arch.xen.cpuid.base)
return;
function = vcpu->arch.xen.cpuid.base | XEN_CPUID_LEAF(3);
if (function > vcpu->arch.xen.cpuid.limit)
return;
entry = kvm_find_cpuid_entry_index(vcpu, function, 1);
if (entry) {
entry->ecx = vcpu->arch.pvclock_tsc_mul;
entry->edx = vcpu->arch.pvclock_tsc_shift;
}
entry = kvm_find_cpuid_entry_index(vcpu, function, 2);
if (entry)
entry->eax = vcpu->arch.hw_tsc_khz;
}
void kvm_xen_init_vm(struct kvm *kvm)
{
mutex_init(&kvm->arch.xen.xen_lock);
@ -2346,6 +2332,6 @@ void kvm_xen_destroy_vm(struct kvm *kvm)
}
idr_destroy(&kvm->arch.xen.evtchn_ports);
if (kvm->arch.xen_hvm_config.msr)
if (kvm->arch.xen.hvm_config.msr)
static_branch_slow_dec_deferred(&kvm_xen_enabled);
}

View File

@ -9,6 +9,7 @@
#ifndef __ARCH_X86_KVM_XEN_H__
#define __ARCH_X86_KVM_XEN_H__
#include <asm/xen/cpuid.h>
#include <asm/xen/hypervisor.h>
#ifdef CONFIG_KVM_XEN
@ -35,7 +36,6 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe,
int kvm_xen_setup_evtchn(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *e,
const struct kvm_irq_routing_entry *ue);
void kvm_xen_update_tsc_info(struct kvm_vcpu *vcpu);
static inline void kvm_xen_sw_enable_lapic(struct kvm_vcpu *vcpu)
{
@ -50,16 +50,32 @@ static inline void kvm_xen_sw_enable_lapic(struct kvm_vcpu *vcpu)
kvm_xen_inject_vcpu_vector(vcpu);
}
static inline bool kvm_xen_is_tsc_leaf(struct kvm_vcpu *vcpu, u32 function)
{
return static_branch_unlikely(&kvm_xen_enabled.key) &&
vcpu->arch.xen.cpuid.base &&
function <= vcpu->arch.xen.cpuid.limit &&
function == (vcpu->arch.xen.cpuid.base | XEN_CPUID_LEAF(3));
}
static inline bool kvm_xen_msr_enabled(struct kvm *kvm)
{
return static_branch_unlikely(&kvm_xen_enabled.key) &&
kvm->arch.xen_hvm_config.msr;
kvm->arch.xen.hvm_config.msr;
}
static inline bool kvm_xen_is_hypercall_page_msr(struct kvm *kvm, u32 msr)
{
if (!static_branch_unlikely(&kvm_xen_enabled.key))
return false;
return msr && msr == kvm->arch.xen.hvm_config.msr;
}
static inline bool kvm_xen_hypercall_enabled(struct kvm *kvm)
{
return static_branch_unlikely(&kvm_xen_enabled.key) &&
(kvm->arch.xen_hvm_config.flags &
(kvm->arch.xen.hvm_config.flags &
KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL);
}
@ -124,6 +140,11 @@ static inline bool kvm_xen_msr_enabled(struct kvm *kvm)
return false;
}
static inline bool kvm_xen_is_hypercall_page_msr(struct kvm *kvm, u32 msr)
{
return false;
}
static inline bool kvm_xen_hypercall_enabled(struct kvm *kvm)
{
return false;
@ -157,8 +178,9 @@ static inline bool kvm_xen_timer_enabled(struct kvm_vcpu *vcpu)
return false;
}
static inline void kvm_xen_update_tsc_info(struct kvm_vcpu *vcpu)
static inline bool kvm_xen_is_tsc_leaf(struct kvm_vcpu *vcpu, u32 function)
{
return false;
}
#endif