mirror of
https://github.com/torvalds/linux.git
synced 2026-06-01 11:03:43 +02:00
KVM x86 fixes for 7.1-rcN
- Include the kernel's linux/mman.h in KVM selftests to ensure MADV_COLLAPSE
is defined, as older libc versions may not provide it.
- Include execinfo.h if and only if KVM selftests are building against glibc,
and provide a test_dump_stack() for non-glibc builds.
- Fudge around an RCU splat in the emegerncy reboot code that is technically
a legitimate flaw, but in practice is a non-issue and fixing the flaw, e.g.
by adding locking, would incur meaningful risk, i.e. do more harm than good.
- Rate-limit global clock updates once again (but without delayed work), as
KVM was subtly relying on the old rate-limiting for NPT correction to guard
against "update storms" when running without a master clock on systems with
overcommitted CPUs.
- Fix a brown paper bag goof where KVM checked if ERAPS is "dirty" instead of
marking it dirty when emulating INVPCID.
- Flush the TLB when transitioning from xAVIC => x2AVIC to ensure the CPU TLB
doesn't contain AVIC-tagged entries for the APIC base GPA.
-----BEGIN PGP SIGNATURE-----
iQIzBAABCgAdFiEEKTobbabEP7vbhhN9OlYIJqCjN/0FAmoZtdgACgkQOlYIJqCj
N/26sw/+IWOA5AxyoNW/lKAhhkzDTzGWrNCQkpMv+F4tOUbHYniTxI/pv4L3eMvf
ZLUXijYxhpJtnblLtrnPpSFl5tll4xQdMUv7+fljgpYmy6+erQHodtCgRi5wHDbM
NlD7DWOgwmpvzYLcybq1RfjZ3n+OBRvq95haQ6Ph4FtoYuIomtJ5tF2mnMlyxlc/
aIK5wzQ/JeYdQxwwz1ctlHkgE5bPnS+Sxr33+MRFQ5cIpuwdoS9zYRITNBM107kg
bLeei8Cxh91sgEidgwS8JToLvaEQH8AodkROjcScllwUxYsshPKsHeH7sTMbCOVd
DiH9VbheZo7d4kb6pvhGsY891ec00dR5E/l2gZYLWHg4v0lINTw6uBdoJuq3t2TO
Q3KmGVaUWz+c6dY/0qntVpws35zG106S8Pp4mx/1EnUHbJKZYDsUMC1ppwhrr3Pz
WEyQ9PFXhOyoSbrtOaEfU+wsFPeAfT9eYADu7oV1t7l75TJAKW1EEaSGfzOO/crj
3GK3vRq2B1cMHX9c4fwhSs4h8k5JvKlI/mtGPxZN3khVorx9dv/rTqOoeQEsFS5+
8s5XcNPPJlKfNXcu3Jq6rn8U/JA2HnbH298Nk5uXTCfTrZtDgbOnI8YVYWnoadOl
8xJoie5ccEsysVj1npNNh61LNMF1XBUUC+eNn0I1o0NzeRauxF8=
=QQUn
-----END PGP SIGNATURE-----
Merge tag 'kvm-x86-fixes-7.1-rc6' of https://github.com/kvm-x86/linux into HEAD
KVM x86 fixes for 7.1-rcN
- Include the kernel's linux/mman.h in KVM selftests to ensure MADV_COLLAPSE
is defined, as older libc versions may not provide it.
- Include execinfo.h if and only if KVM selftests are building against glibc,
and provide a test_dump_stack() for non-glibc builds.
- Fudge around an RCU splat in the emegerncy reboot code that is technically
a legitimate flaw, but in practice is a non-issue and fixing the flaw, e.g.
by adding locking, would incur meaningful risk, i.e. do more harm than good.
- Rate-limit global clock updates once again (but without delayed work), as
KVM was subtly relying on the old rate-limiting for NPT correction to guard
against "update storms" when running without a master clock on systems with
overcommitted CPUs.
- Fix a brown paper bag goof where KVM checked if ERAPS is "dirty" instead of
marking it dirty when emulating INVPCID.
- Flush the TLB when transitioning from xAVIC => x2AVIC to ensure the CPU TLB
doesn't contain AVIC-tagged entries for the APIC base GPA.
This commit is contained in:
commit
b397897016
|
|
@ -1504,6 +1504,7 @@ struct kvm_arch {
|
|||
bool use_master_clock;
|
||||
u64 master_kernel_ns;
|
||||
u64 master_cycle_now;
|
||||
struct ratelimit_state kvmclock_update_rs;
|
||||
|
||||
#ifdef CONFIG_KVM_HYPERV
|
||||
struct kvm_hv hyperv;
|
||||
|
|
|
|||
|
|
@ -206,6 +206,35 @@ static void avic_activate_vmcb(struct vcpu_svm *svm)
|
|||
|
||||
svm_clr_intercept(svm, INTERCEPT_CR8_WRITE);
|
||||
|
||||
/*
|
||||
* Flush the TLB when enabling (x2)AVIC and when transitioning between
|
||||
* xAVIC and x2AVIC, as the CPU may have inserted a TLB entry for the
|
||||
* "wrong" mapping.
|
||||
*
|
||||
* KVM uses a per-VM "scratch" page to back the APIC memslot, because
|
||||
* KVM also uses per-VM page tables *and* maintains the page table (NPT
|
||||
* or shadow page) mappings for said memslot even if one or more vCPUs
|
||||
* have their local APIC hardware-disabled or are in x2APIC mode, i.e.
|
||||
* even if one or more vCPUs' APIC MMIO BAR is effectively disabled.
|
||||
*
|
||||
* If xAVIC is fully enabled, hardware ignores the physical address in
|
||||
* KVM's page tables, i.e. in the leaf SPTE for the APIC memslot, and
|
||||
* instead redirects the access to the AVIC backing page, i.e. to the
|
||||
* vCPU's virtual APIC page. If xAVIC is not enabled (APIC is either
|
||||
* hardware-disabled or in x2APIC mode), then guest accesses will use
|
||||
* the page table mapping verbatim, i.e. will access the per-VM scratch
|
||||
* page, as normal memory.
|
||||
*
|
||||
* In both cases, the CPU is allowed to cache TLB entries for the APIC
|
||||
* base GPA. So, KVM needs to flush the TLB when enabling xAVIC, as
|
||||
* accesses need to be redirected to the virtual APIC page, but the TLB
|
||||
* may contain entries pointing at the scratch page. KVM also needs to
|
||||
* flush the TLB when enabling x2AVIC, as accesses need to go to the
|
||||
* scratch page, but the TLB may contain entries tagged as xAVIC, i.e.
|
||||
* entries pointing to the vCPU's virtual APIC page.
|
||||
*/
|
||||
kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, &svm->vcpu);
|
||||
|
||||
/*
|
||||
* Note: KVM supports hybrid-AVIC mode, where KVM emulates x2APIC MSR
|
||||
* accesses, while interrupt injection to a running vCPU can be
|
||||
|
|
@ -219,12 +248,6 @@ static void avic_activate_vmcb(struct vcpu_svm *svm)
|
|||
/* Disabling MSR intercept for x2APIC registers */
|
||||
avic_set_x2apic_msr_interception(svm, false);
|
||||
} else {
|
||||
/*
|
||||
* Flush the TLB, the guest may have inserted a non-APIC
|
||||
* mapping into the TLB while AVIC was disabled.
|
||||
*/
|
||||
kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, &svm->vcpu);
|
||||
|
||||
/* Enabling MSR intercept for x2APIC registers */
|
||||
avic_set_x2apic_msr_interception(svm, true);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5227,8 +5227,13 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
|||
* On a host with synchronized TSC, there is no need to update
|
||||
* kvmclock on vcpu->cpu migration
|
||||
*/
|
||||
if (!vcpu->kvm->arch.use_master_clock || vcpu->cpu == -1)
|
||||
kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
|
||||
if (!vcpu->kvm->arch.use_master_clock || vcpu->cpu == -1) {
|
||||
if (__ratelimit(&vcpu->kvm->arch.kvmclock_update_rs))
|
||||
kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
|
||||
else
|
||||
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
|
||||
}
|
||||
|
||||
if (vcpu->cpu != cpu)
|
||||
kvm_make_request(KVM_REQ_MIGRATE_TIMER, vcpu);
|
||||
vcpu->cpu = cpu;
|
||||
|
|
@ -13366,6 +13371,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
|||
raw_spin_lock_init(&kvm->arch.tsc_write_lock);
|
||||
mutex_init(&kvm->arch.apic_map_lock);
|
||||
seqcount_raw_spinlock_init(&kvm->arch.pvclock_sc, &kvm->arch.tsc_write_lock);
|
||||
ratelimit_state_init(&kvm->arch.kvmclock_update_rs, HZ, 10);
|
||||
ratelimit_set_flags(&kvm->arch.kvmclock_update_rs, RATELIMIT_MSG_ON_RELEASE);
|
||||
kvm->arch.kvmclock_offset = -get_kvmclock_base_ns();
|
||||
|
||||
raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
|
||||
|
|
@ -14323,7 +14330,7 @@ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva)
|
|||
* the RAP (Return Address Predicator).
|
||||
*/
|
||||
if (guest_cpu_cap_has(vcpu, X86_FEATURE_ERAPS))
|
||||
kvm_register_is_dirty(vcpu, VCPU_EXREG_ERAPS);
|
||||
kvm_register_mark_dirty(vcpu, VCPU_EXREG_ERAPS);
|
||||
|
||||
kvm_invalidate_pcid(vcpu, operand.pcid);
|
||||
return kvm_skip_emulated_instruction(vcpu);
|
||||
|
|
|
|||
|
|
@ -49,7 +49,20 @@ static void x86_virt_invoke_kvm_emergency_callback(void)
|
|||
{
|
||||
cpu_emergency_virt_cb *kvm_callback;
|
||||
|
||||
kvm_callback = rcu_dereference(kvm_emergency_callback);
|
||||
/*
|
||||
* RCU may not be watching the crashing CPU here, so rcu_dereference()
|
||||
* triggers a suspicious-RCU-usage splat. In principle, a concurrent
|
||||
* KVM module unload could race with this read; see commit 2baa33a8ddd6
|
||||
* ("KVM: x86: Leave user-return notifier registered on reboot/shutdown")
|
||||
* which notes that nothing prevents module unload during panic/reboot.
|
||||
*
|
||||
* However, taking a lock here would be riskier than the current race:
|
||||
* the system is going down via NMI shootdown, and any lock could be
|
||||
* held by an already-stopped CPU. Use rcu_dereference_raw() to silence
|
||||
* the lockdep splat and accept the comically small remaining race;
|
||||
* panic context inherently cannot guarantee complete correctness.
|
||||
*/
|
||||
kvm_callback = rcu_dereference_raw(kvm_emergency_callback);
|
||||
if (kvm_callback)
|
||||
kvm_callback();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -41,10 +41,10 @@
|
|||
#include <inttypes.h>
|
||||
#include <limits.h>
|
||||
#include <pthread.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "kvm_syscalls.h"
|
||||
#include "kvm_util.h"
|
||||
#include "test_util.h"
|
||||
#include "memstress.h"
|
||||
|
|
|
|||
|
|
@ -14,10 +14,10 @@
|
|||
#include <linux/bitmap.h>
|
||||
#include <linux/falloc.h>
|
||||
#include <linux/sizes.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "kvm_syscalls.h"
|
||||
#include "kvm_util.h"
|
||||
#include "numaif.h"
|
||||
#include "test_util.h"
|
||||
|
|
|
|||
|
|
@ -2,8 +2,18 @@
|
|||
#ifndef SELFTEST_KVM_SYSCALLS_H
|
||||
#define SELFTEST_KVM_SYSCALLS_H
|
||||
|
||||
/*
|
||||
* Include both the kernel and libc versions of mman.h. The kernel provides
|
||||
* the most up-to-date flags and definitions, while libc provides the syscall
|
||||
* wrappers tests expect.
|
||||
*/
|
||||
#include <linux/mman.h>
|
||||
|
||||
#include <sys/mman.h>
|
||||
#include <sys/syscall.h>
|
||||
|
||||
#include <test_util.h>
|
||||
|
||||
#define MAP_ARGS0(m,...)
|
||||
#define MAP_ARGS1(m,t,a,...) m(t,a)
|
||||
#define MAP_ARGS2(m,t,a,...) m(t,a), MAP_ARGS1(m,__VA_ARGS__)
|
||||
|
|
|
|||
|
|
@ -19,9 +19,9 @@
|
|||
#include <errno.h>
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/mman.h>
|
||||
#include "kselftest.h"
|
||||
|
||||
#include <linux/mman.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#define msecs_to_usecs(msec) ((msec) * 1000ULL)
|
||||
|
|
|
|||
|
|
@ -6,11 +6,14 @@
|
|||
*/
|
||||
#include "test_util.h"
|
||||
|
||||
#include <execinfo.h>
|
||||
|
||||
#include <sys/syscall.h>
|
||||
|
||||
#include "kselftest.h"
|
||||
|
||||
#ifdef __GLIBC__
|
||||
#include <execinfo.h>
|
||||
|
||||
/* Dumps the current stack trace to stderr. */
|
||||
static void __attribute__((noinline)) test_dump_stack(void);
|
||||
static void test_dump_stack(void)
|
||||
|
|
@ -57,6 +60,9 @@ static void test_dump_stack(void)
|
|||
system(cmd);
|
||||
#pragma GCC diagnostic pop
|
||||
}
|
||||
#else
|
||||
static void test_dump_stack(void) {}
|
||||
#endif
|
||||
|
||||
static pid_t _gettid(void)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -5,13 +5,13 @@
|
|||
* Copyright (C) 2018, Google LLC.
|
||||
*/
|
||||
#include "test_util.h"
|
||||
#include "kvm_syscalls.h"
|
||||
#include "kvm_util.h"
|
||||
#include "processor.h"
|
||||
#include "ucall_common.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <sched.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/resource.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
|
|
|||
|
|
@ -15,7 +15,6 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/mman.h>
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
|
||||
|
|
@ -23,6 +22,7 @@
|
|||
#include <linux/sizes.h>
|
||||
|
||||
#include <test_util.h>
|
||||
#include <kvm_syscalls.h>
|
||||
#include <kvm_util.h>
|
||||
#include <processor.h>
|
||||
#include <ucall_common.h>
|
||||
|
|
|
|||
|
|
@ -4,11 +4,10 @@
|
|||
*
|
||||
* Copyright (C) 2024, Red Hat, Inc.
|
||||
*/
|
||||
#include <sys/mman.h>
|
||||
|
||||
#include <linux/fs.h>
|
||||
|
||||
#include "test_util.h"
|
||||
#include "kvm_syscalls.h"
|
||||
#include "kvm_util.h"
|
||||
#include "kselftest.h"
|
||||
#include "ucall_common.h"
|
||||
|
|
|
|||
|
|
@ -4,8 +4,8 @@
|
|||
*
|
||||
* Copyright IBM Corp. 2021
|
||||
*/
|
||||
#include <sys/mman.h>
|
||||
#include "test_util.h"
|
||||
#include "kvm_syscalls.h"
|
||||
#include "kvm_util.h"
|
||||
#include "kselftest.h"
|
||||
#include "ucall_common.h"
|
||||
|
|
|
|||
|
|
@ -8,11 +8,11 @@
|
|||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/mman.h>
|
||||
|
||||
#include <linux/compiler.h>
|
||||
|
||||
#include <test_util.h>
|
||||
#include <kvm_syscalls.h>
|
||||
#include <kvm_util.h>
|
||||
#include <processor.h>
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user