KVM x86 misc changes for 6.15:

- Fix a bug in PIC emulation that caused KVM to emit a spurious KVM_REQ_EVENT.
 
  - Add a helper to consolidate handling of mp_state transitions, and use it to
    clear pv_unhalted whenever a vCPU is made RUNNABLE.
 
  - Defer runtime CPUID updates until KVM emulates a CPUID instruction, to
    coalesce updates when multiple pieces of vCPU state are changing, e.g. as
    part of a nested transition.
 
  - Fix a variety of nested emulation bugs, and add VMX support for synthesizing
    nested VM-Exit on interception (instead of injecting #UD into L2).
 
  - Drop "support" for PV Async #PF with proctected guests without SEND_ALWAYS,
    as KVM can't get the current CPL.
 
  - Misc cleanups
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEKTobbabEP7vbhhN9OlYIJqCjN/0FAmfZhoYACgkQOlYIJqCj
 N/1oSBAAlhsZzv4m6rHtWACjGsSBlAE5WM7HrpnwjOyMW+Desc0WLL4L6qAlxgT7
 4ZkBvQ4zsiyUIdLv1XARvkBYHTUkcgG8fQSSJQ6grZit5OcFcMgWafvQrJYI6428
 TyzF/x6t0gj8CjDluA6jM/eL5HhWZZDOIg0Wma+pyYpW+7y2tkphXyYyOQPBGwv3
 geRUetbDHHXjf042k/8f1j1vjzrNNvAg3YyNyx1YbdU9XKsn5D+SeUW2eVfYk8G7
 5QsCOGvUYcbbjrR8kbCZKexvoH6Np9J6YKDe4R9R2yDzgs/96qz6xkYTGCVkHA1y
 uursKqRHgbXBxzxa+ban073laT7Qt3S01Gd9bJW3IO7hzG89gl4qfX7fap8T9Yc2
 yeBTYIgInpyx+NCdZ2Z/++BzPagBGfa77gFX/eIkmsVA9LWYi9CI3FSjtr/czvWm
 a4tfMPvTVBjsBQQ7t/lNksrq0O51lbb3iqqv3ToQpDOOqCWuMEU5xcihhPRr5NSZ
 dX4o/jIDhCV8EyXdtASyqMlYBXcuC45ojEZn1elh0QogzYAdSGQ2bIDyxuBtA//k
 kSbi+E4GB64jVfBWUyK2QeLOBnBkH7mh6Cg5UYr1Ln9Sm6l8vrcxhcbnchiWxXMI
 WCK7BJwI2HojBVpEZ04jMkjHvg36uSfjOzmMLT5yPXfFNebsGmA=
 =8SGF
 -----END PGP SIGNATURE-----

Merge tag 'kvm-x86-misc-6.15' of https://github.com/kvm-x86/linux into HEAD

KVM x86 misc changes for 6.15:

 - Fix a bug in PIC emulation that caused KVM to emit a spurious KVM_REQ_EVENT.

 - Add a helper to consolidate handling of mp_state transitions, and use it to
   clear pv_unhalted whenever a vCPU is made RUNNABLE.

 - Defer runtime CPUID updates until KVM emulates a CPUID instruction, to
   coalesce updates when multiple pieces of vCPU state are changing, e.g. as
   part of a nested transition.

 - Fix a variety of nested emulation bugs, and add VMX support for synthesizing
   nested VM-Exit on interception (instead of injecting #UD into L2).

 - Drop "support" for PV Async #PF with proctected guests without SEND_ALWAYS,
   as KVM can't get the current CPL.

 - Misc cleanups
This commit is contained in:
Paolo Bonzini 2025-03-19 09:04:48 -04:00
commit 4d9a677596
20 changed files with 416 additions and 181 deletions

View File

@ -881,6 +881,7 @@ struct kvm_vcpu_arch {
int cpuid_nent;
struct kvm_cpuid_entry2 *cpuid_entries;
bool cpuid_dynamic_bits_dirty;
bool is_amd_compatible;
/*
@ -998,8 +999,8 @@ struct kvm_vcpu_arch {
u64 msr_int_val; /* MSR_KVM_ASYNC_PF_INT */
u16 vec;
u32 id;
bool send_user_only;
u32 host_apf_flags;
bool send_always;
bool delivery_as_pf_vmexit;
bool pageready_pending;
} apf;
@ -1355,8 +1356,6 @@ struct kvm_arch {
u64 shadow_mmio_value;
struct iommu_domain *iommu_domain;
bool iommu_noncoherent;
#define __KVM_HAVE_ARCH_NONCOHERENT_DMA
atomic_t noncoherent_dma_count;
#define __KVM_HAVE_ARCH_ASSIGNED_DEVICE
@ -2166,8 +2165,8 @@ int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu);
void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr);
void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr, unsigned long payload);
void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr);
void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned int nr,
bool has_error_code, u32 error_code);
void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault);
void kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
struct x86_exception *fault);

View File

@ -58,25 +58,24 @@ void __init kvm_init_xstate_sizes(void)
u32 xstate_required_size(u64 xstate_bv, bool compacted)
{
int feature_bit = 0;
u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
int i;
xstate_bv &= XFEATURE_MASK_EXTEND;
while (xstate_bv) {
if (xstate_bv & 0x1) {
struct cpuid_xstate_sizes *xs = &xstate_sizes[feature_bit];
u32 offset;
for (i = XFEATURE_YMM; i < ARRAY_SIZE(xstate_sizes) && xstate_bv; i++) {
struct cpuid_xstate_sizes *xs = &xstate_sizes[i];
u32 offset;
/* ECX[1]: 64B alignment in compacted form */
if (compacted)
offset = (xs->ecx & 0x2) ? ALIGN(ret, 64) : ret;
else
offset = xs->ebx;
ret = max(ret, offset + xs->eax);
}
if (!(xstate_bv & BIT_ULL(i)))
continue;
xstate_bv >>= 1;
feature_bit++;
/* ECX[1]: 64B alignment in compacted form */
if (compacted)
offset = (xs->ecx & 0x2) ? ALIGN(ret, 64) : ret;
else
offset = xs->ebx;
ret = max(ret, offset + xs->eax);
xstate_bv &= ~BIT_ULL(i);
}
return ret;
@ -196,6 +195,7 @@ static int kvm_check_cpuid(struct kvm_vcpu *vcpu)
}
static u32 kvm_apply_cpuid_pv_features_quirk(struct kvm_vcpu *vcpu);
static void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu);
/* Check whether the supplied CPUID data is equal to what is already set for the vCPU. */
static int kvm_cpuid_check_equal(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2,
@ -300,10 +300,12 @@ static __always_inline void kvm_update_feature_runtime(struct kvm_vcpu *vcpu,
guest_cpu_cap_change(vcpu, x86_feature, has_feature);
}
void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
static void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
vcpu->arch.cpuid_dynamic_bits_dirty = false;
best = kvm_find_cpuid_entry(vcpu, 1);
if (best) {
kvm_update_feature_runtime(vcpu, best, X86_FEATURE_OSXSAVE,
@ -333,7 +335,6 @@ void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
cpuid_entry_has(best, X86_FEATURE_XSAVEC)))
best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
}
EXPORT_SYMBOL_GPL(kvm_update_cpuid_runtime);
static bool kvm_cpuid_has_hyperv(struct kvm_vcpu *vcpu)
{
@ -646,6 +647,9 @@ int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
if (cpuid->nent < vcpu->arch.cpuid_nent)
return -E2BIG;
if (vcpu->arch.cpuid_dynamic_bits_dirty)
kvm_update_cpuid_runtime(vcpu);
if (copy_to_user(entries, vcpu->arch.cpuid_entries,
vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2)))
return -EFAULT;
@ -1704,7 +1708,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
phys_as = entry->eax & 0xff;
g_phys_as = phys_as;
if (kvm_mmu_get_max_tdp_level() < 5)
g_phys_as = min(g_phys_as, 48);
g_phys_as = min(g_phys_as, 48U);
}
entry->eax = phys_as | (virt_as << 8) | (g_phys_as << 16);
@ -1769,13 +1773,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
cpuid_entry_override(entry, CPUID_8000_0022_EAX);
if (kvm_cpu_cap_has(X86_FEATURE_PERFMON_V2))
ebx.split.num_core_pmc = kvm_pmu_cap.num_counters_gp;
else if (kvm_cpu_cap_has(X86_FEATURE_PERFCTR_CORE))
ebx.split.num_core_pmc = AMD64_NUM_COUNTERS_CORE;
else
ebx.split.num_core_pmc = AMD64_NUM_COUNTERS;
ebx.split.num_core_pmc = kvm_pmu_cap.num_counters_gp;
entry->ebx = ebx.full;
break;
}
@ -1985,6 +1983,9 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
struct kvm_cpuid_entry2 *entry;
bool exact, used_max_basic = false;
if (vcpu->arch.cpuid_dynamic_bits_dirty)
kvm_update_cpuid_runtime(vcpu);
entry = kvm_find_cpuid_entry_index(vcpu, function, index);
exact = !!entry;
@ -2000,7 +2001,8 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
*edx = entry->edx;
if (function == 7 && index == 0) {
u64 data;
if (!__kvm_get_msr(vcpu, MSR_IA32_TSX_CTRL, &data, true) &&
if ((*ebx & (feature_bit(RTM) | feature_bit(HLE))) &&
!__kvm_get_msr(vcpu, MSR_IA32_TSX_CTRL, &data, true) &&
(data & TSX_CTRL_CPUID_CLEAR))
*ebx &= ~(feature_bit(RTM) | feature_bit(HLE));
} else if (function == 0x80000007) {

View File

@ -11,7 +11,6 @@ extern u32 kvm_cpu_caps[NR_KVM_CPU_CAPS] __read_mostly;
void kvm_set_cpu_caps(void);
void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu);
void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu);
struct kvm_cpuid_entry2 *kvm_find_cpuid_entry_index(struct kvm_vcpu *vcpu,
u32 function, u32 index);
struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
@ -232,6 +231,14 @@ static __always_inline bool guest_cpu_cap_has(struct kvm_vcpu *vcpu,
{
unsigned int x86_leaf = __feature_leaf(x86_feature);
/*
* Except for MWAIT, querying dynamic feature bits is disallowed, so
* that KVM can defer runtime updates until the next CPUID emulation.
*/
BUILD_BUG_ON(x86_feature == X86_FEATURE_APIC ||
x86_feature == X86_FEATURE_OSXSAVE ||
x86_feature == X86_FEATURE_OSPKE);
return vcpu->arch.cpu_caps[x86_leaf] & __feature_bit(x86_feature);
}

View File

@ -477,8 +477,11 @@ static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt,
.dst_val = ctxt->dst.val64,
.src_bytes = ctxt->src.bytes,
.dst_bytes = ctxt->dst.bytes,
.src_type = ctxt->src.type,
.dst_type = ctxt->dst.type,
.ad_bytes = ctxt->ad_bytes,
.next_rip = ctxt->eip,
.rip = ctxt->eip,
.next_rip = ctxt->_eip,
};
return ctxt->ops->intercept(ctxt, &info, stage);

View File

@ -567,7 +567,7 @@ static void pic_irq_request(struct kvm *kvm, int level)
{
struct kvm_pic *s = kvm->arch.vpic;
if (!s->output)
if (!s->output && level)
s->wakeup_needed = true;
s->output = level;
}

View File

@ -44,7 +44,10 @@ struct x86_instruction_info {
u64 dst_val; /* value of destination operand */
u8 src_bytes; /* size of source operand */
u8 dst_bytes; /* size of destination operand */
u8 src_type; /* type of source operand */
u8 dst_type; /* type of destination operand */
u8 ad_bytes; /* size of src/dst address */
u64 rip; /* rip of the instruction */
u64 next_rip; /* rip following the instruction */
};
@ -272,8 +275,10 @@ struct operand {
};
};
#define X86_MAX_INSTRUCTION_LENGTH 15
struct fetch_cache {
u8 data[15];
u8 data[X86_MAX_INSTRUCTION_LENGTH];
u8 *ptr;
u8 *end;
};

View File

@ -221,13 +221,6 @@ static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
}
}
static void kvm_apic_map_free(struct rcu_head *rcu)
{
struct kvm_apic_map *map = container_of(rcu, struct kvm_apic_map, rcu);
kvfree(map);
}
static int kvm_recalculate_phys_map(struct kvm_apic_map *new,
struct kvm_vcpu *vcpu,
bool *xapic_id_mismatch)
@ -489,7 +482,7 @@ static void kvm_recalculate_apic_map(struct kvm *kvm)
mutex_unlock(&kvm->arch.apic_map_lock);
if (old)
call_rcu(&old->rcu, kvm_apic_map_free);
kvfree_rcu(old, rcu);
kvm_make_scan_ioapic_request(kvm);
}
@ -2593,7 +2586,7 @@ static void __kvm_apic_set_base(struct kvm_vcpu *vcpu, u64 value)
vcpu->arch.apic_base = value;
if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE)
kvm_update_cpuid_runtime(vcpu);
vcpu->arch.cpuid_dynamic_bits_dirty = true;
if (!apic)
return;
@ -3397,9 +3390,9 @@ int kvm_apic_accept_events(struct kvm_vcpu *vcpu)
if (test_and_clear_bit(KVM_APIC_INIT, &apic->pending_events)) {
kvm_vcpu_reset(vcpu, true);
if (kvm_vcpu_is_bsp(apic->vcpu))
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE);
else
vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
kvm_set_mp_state(vcpu, KVM_MP_STATE_INIT_RECEIVED);
}
if (test_and_clear_bit(KVM_APIC_SIPI, &apic->pending_events)) {
if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
@ -3408,7 +3401,7 @@ int kvm_apic_accept_events(struct kvm_vcpu *vcpu)
sipi_vector = apic->sipi_vector;
kvm_x86_call(vcpu_deliver_sipi_vector)(vcpu,
sipi_vector);
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE);
}
}
return 0;

View File

@ -358,7 +358,7 @@ void enter_smm(struct kvm_vcpu *vcpu)
goto error;
#endif
kvm_update_cpuid_runtime(vcpu);
vcpu->arch.cpuid_dynamic_bits_dirty = true;
kvm_mmu_reset_context(vcpu);
return;
error:

View File

@ -994,7 +994,7 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
/* in case we halted in L2 */
svm->vcpu.arch.mp_state = KVM_MP_STATE_RUNNABLE;
kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE);
/* Give the current vmcb to the guest */

View File

@ -3271,7 +3271,7 @@ static void sev_es_sync_from_ghcb(struct vcpu_svm *svm)
if (kvm_ghcb_xcr0_is_valid(svm)) {
vcpu->arch.xcr0 = ghcb_get_xcr0(ghcb);
kvm_update_cpuid_runtime(vcpu);
vcpu->arch.cpuid_dynamic_bits_dirty = true;
}
/* Copy the GHCB exit information into the VMCB fields */
@ -3855,7 +3855,7 @@ static int __sev_snp_update_protected_guest_state(struct kvm_vcpu *vcpu)
/* Mark the vCPU as offline and not runnable */
vcpu->arch.pv.pv_unhalted = false;
vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
kvm_set_mp_state(vcpu, KVM_MP_STATE_HALTED);
/* Clear use of the VMSA */
svm->vmcb->control.vmsa_pa = INVALID_PAGE;
@ -3893,8 +3893,7 @@ static int __sev_snp_update_protected_guest_state(struct kvm_vcpu *vcpu)
svm->vmcb->control.vmsa_pa = pfn_to_hpa(pfn);
/* Mark the vCPU as runnable */
vcpu->arch.pv.pv_unhalted = false;
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE);
svm->sev_es.snp_vmsa_gpa = INVALID_PAGE;

View File

@ -1932,7 +1932,7 @@ void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
vmcb_mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
if ((cr4 ^ old_cr4) & (X86_CR4_OSXSAVE | X86_CR4_PKE))
kvm_update_cpuid_runtime(vcpu);
vcpu->arch.cpuid_dynamic_bits_dirty = true;
}
static void svm_set_segment(struct kvm_vcpu *vcpu,
@ -4137,20 +4137,23 @@ static void svm_complete_interrupts(struct kvm_vcpu *vcpu)
vcpu->arch.nmi_injected = true;
svm->nmi_l1_to_l2 = nmi_l1_to_l2;
break;
case SVM_EXITINTINFO_TYPE_EXEPT:
case SVM_EXITINTINFO_TYPE_EXEPT: {
u32 error_code = 0;
/*
* Never re-inject a #VC exception.
*/
if (vector == X86_TRAP_VC)
break;
if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) {
u32 err = svm->vmcb->control.exit_int_info_err;
kvm_requeue_exception_e(vcpu, vector, err);
if (exitintinfo & SVM_EXITINTINFO_VALID_ERR)
error_code = svm->vmcb->control.exit_int_info_err;
} else
kvm_requeue_exception(vcpu, vector);
kvm_requeue_exception(vcpu, vector,
exitintinfo & SVM_EXITINTINFO_VALID_ERR,
error_code);
break;
}
case SVM_EXITINTINFO_TYPE_INTR:
kvm_queue_interrupt(vcpu, vector, false);
break;

View File

@ -830,12 +830,12 @@ TRACE_EVENT(kvm_emulate_insn,
TP_ARGS(vcpu, failed),
TP_STRUCT__entry(
__field( __u64, rip )
__field( __u32, csbase )
__field( __u8, len )
__array( __u8, insn, 15 )
__field( __u8, flags )
__field( __u8, failed )
__field( __u64, rip )
__field( __u32, csbase )
__field( __u8, len )
__array( __u8, insn, X86_MAX_INSTRUCTION_LENGTH )
__field( __u8, flags )
__field( __u8, failed )
),
TP_fast_assign(
@ -846,7 +846,7 @@ TRACE_EVENT(kvm_emulate_insn,
__entry->rip = vcpu->arch.emulate_ctxt->_eip - __entry->len;
memcpy(__entry->insn,
vcpu->arch.emulate_ctxt->fetch.data,
15);
X86_MAX_INSTRUCTION_LENGTH);
__entry->flags = kei_decode_mode(vcpu->arch.emulate_ctxt->mode);
__entry->failed = failed;
),

View File

@ -2970,7 +2970,7 @@ static int nested_check_vm_entry_controls(struct kvm_vcpu *vcpu,
case INTR_TYPE_SOFT_EXCEPTION:
case INTR_TYPE_SOFT_INTR:
case INTR_TYPE_PRIV_SW_EXCEPTION:
if (CC(vmcs12->vm_entry_instruction_len > 15) ||
if (CC(vmcs12->vm_entry_instruction_len > X86_MAX_INSTRUCTION_LENGTH) ||
CC(vmcs12->vm_entry_instruction_len == 0 &&
CC(!nested_cpu_has_zero_length_injection(vcpu))))
return -EINVAL;
@ -3771,7 +3771,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
break;
case GUEST_ACTIVITY_WAIT_SIPI:
vmx->nested.nested_run_pending = 0;
vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
kvm_set_mp_state(vcpu, KVM_MP_STATE_INIT_RECEIVED);
break;
default:
break;
@ -4618,7 +4618,7 @@ static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
*/
static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
u32 vm_exit_reason, u32 exit_intr_info,
unsigned long exit_qualification)
unsigned long exit_qualification, u32 exit_insn_len)
{
/* update exit information fields: */
vmcs12->vm_exit_reason = vm_exit_reason;
@ -4646,7 +4646,7 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
vm_exit_reason, exit_intr_info);
vmcs12->vm_exit_intr_info = exit_intr_info;
vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
vmcs12->vm_exit_instruction_len = exit_insn_len;
vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
/*
@ -4930,8 +4930,9 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
* and modify vmcs12 to make it see what it would expect to see there if
* L2 was its real guest. Must only be called when in L2 (is_guest_mode())
*/
void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
u32 exit_intr_info, unsigned long exit_qualification)
void __nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
u32 exit_intr_info, unsigned long exit_qualification,
u32 exit_insn_len)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
@ -4981,7 +4982,8 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
if (vm_exit_reason != -1)
prepare_vmcs12(vcpu, vmcs12, vm_exit_reason,
exit_intr_info, exit_qualification);
exit_intr_info, exit_qualification,
exit_insn_len);
/*
* Must happen outside of sync_vmcs02_to_vmcs12() as it will
@ -5071,7 +5073,7 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
vmx->nested.need_vmcs12_to_shadow_sync = true;
/* in case we halted in L2 */
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE);
if (likely(!vmx->fail)) {
if (vm_exit_reason != -1)

View File

@ -26,8 +26,26 @@ void nested_vmx_free_vcpu(struct kvm_vcpu *vcpu);
enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
bool from_vmentry);
bool nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu);
void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
u32 exit_intr_info, unsigned long exit_qualification);
void __nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
u32 exit_intr_info, unsigned long exit_qualification,
u32 exit_insn_len);
static inline void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
u32 exit_intr_info,
unsigned long exit_qualification)
{
u32 exit_insn_len;
if (to_vmx(vcpu)->fail || vm_exit_reason == -1 ||
(vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
exit_insn_len = 0;
else
exit_insn_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
__nested_vmx_vmexit(vcpu, vm_exit_reason, exit_intr_info,
exit_qualification, exit_insn_len);
}
void nested_sync_vmcs12_to_shadow(struct kvm_vcpu *vcpu);
int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata);

View File

@ -3519,7 +3519,7 @@ void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
vmcs_writel(GUEST_CR4, hw_cr4);
if ((cr4 ^ old_cr4) & (X86_CR4_OSXSAVE | X86_CR4_PKE))
kvm_update_cpuid_runtime(vcpu);
vcpu->arch.cpuid_dynamic_bits_dirty = true;
}
void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg)
@ -7157,13 +7157,17 @@ static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
case INTR_TYPE_SOFT_EXCEPTION:
vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
fallthrough;
case INTR_TYPE_HARD_EXCEPTION:
if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
u32 err = vmcs_read32(error_code_field);
kvm_requeue_exception_e(vcpu, vector, err);
} else
kvm_requeue_exception(vcpu, vector);
case INTR_TYPE_HARD_EXCEPTION: {
u32 error_code = 0;
if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK)
error_code = vmcs_read32(error_code_field);
kvm_requeue_exception(vcpu, vector,
idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK,
error_code);
break;
}
case INTR_TYPE_SOFT_INTR:
vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
fallthrough;
@ -8005,22 +8009,14 @@ static __init void vmx_set_cpu_caps(void)
kvm_cpu_cap_check_and_set(X86_FEATURE_WAITPKG);
}
static int vmx_check_intercept_io(struct kvm_vcpu *vcpu,
struct x86_instruction_info *info)
static bool vmx_is_io_intercepted(struct kvm_vcpu *vcpu,
struct x86_instruction_info *info,
unsigned long *exit_qualification)
{
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
unsigned short port;
bool intercept;
int size;
if (info->intercept == x86_intercept_in ||
info->intercept == x86_intercept_ins) {
port = info->src_val;
size = info->dst_bytes;
} else {
port = info->dst_val;
size = info->src_bytes;
}
bool imm;
/*
* If the 'use IO bitmaps' VM-execution control is 0, IO instruction
@ -8030,13 +8026,33 @@ static int vmx_check_intercept_io(struct kvm_vcpu *vcpu,
* Otherwise, IO instruction VM-exits are controlled by the IO bitmaps.
*/
if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
intercept = nested_cpu_has(vmcs12,
CPU_BASED_UNCOND_IO_EXITING);
else
intercept = nested_vmx_check_io_bitmaps(vcpu, port, size);
return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING);
/* FIXME: produce nested vmexit and return X86EMUL_INTERCEPTED. */
return intercept ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE;
if (info->intercept == x86_intercept_in ||
info->intercept == x86_intercept_ins) {
port = info->src_val;
size = info->dst_bytes;
imm = info->src_type == OP_IMM;
} else {
port = info->dst_val;
size = info->src_bytes;
imm = info->dst_type == OP_IMM;
}
*exit_qualification = ((unsigned long)port << 16) | (size - 1);
if (info->intercept == x86_intercept_ins ||
info->intercept == x86_intercept_outs)
*exit_qualification |= BIT(4);
if (info->rep_prefix)
*exit_qualification |= BIT(5);
if (imm)
*exit_qualification |= BIT(6);
return nested_vmx_check_io_bitmaps(vcpu, port, size);
}
int vmx_check_intercept(struct kvm_vcpu *vcpu,
@ -8045,26 +8061,34 @@ int vmx_check_intercept(struct kvm_vcpu *vcpu,
struct x86_exception *exception)
{
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
unsigned long exit_qualification = 0;
u32 vm_exit_reason;
u64 exit_insn_len;
switch (info->intercept) {
/*
* RDPID causes #UD if disabled through secondary execution controls.
* Because it is marked as EmulateOnUD, we need to intercept it here.
* Note, RDPID is hidden behind ENABLE_RDTSCP.
*/
case x86_intercept_rdpid:
/*
* RDPID causes #UD if not enabled through secondary execution
* controls (ENABLE_RDTSCP). Note, the implicit MSR access to
* TSC_AUX is NOT subject to interception, i.e. checking only
* the dedicated execution control is architecturally correct.
*/
if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_RDTSCP)) {
exception->vector = UD_VECTOR;
exception->error_code_valid = false;
return X86EMUL_PROPAGATE_FAULT;
}
break;
return X86EMUL_CONTINUE;
case x86_intercept_in:
case x86_intercept_ins:
case x86_intercept_out:
case x86_intercept_outs:
return vmx_check_intercept_io(vcpu, info);
if (!vmx_is_io_intercepted(vcpu, info, &exit_qualification))
return X86EMUL_CONTINUE;
vm_exit_reason = EXIT_REASON_IO_INSTRUCTION;
break;
case x86_intercept_lgdt:
case x86_intercept_lidt:
@ -8077,7 +8101,24 @@ int vmx_check_intercept(struct kvm_vcpu *vcpu,
if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_DESC))
return X86EMUL_CONTINUE;
/* FIXME: produce nested vmexit and return X86EMUL_INTERCEPTED. */
if (info->intercept == x86_intercept_lldt ||
info->intercept == x86_intercept_ltr ||
info->intercept == x86_intercept_sldt ||
info->intercept == x86_intercept_str)
vm_exit_reason = EXIT_REASON_LDTR_TR;
else
vm_exit_reason = EXIT_REASON_GDTR_IDTR;
/*
* FIXME: Decode the ModR/M to generate the correct exit
* qualification for memory operands.
*/
break;
case x86_intercept_hlt:
if (!nested_cpu_has(vmcs12, CPU_BASED_HLT_EXITING))
return X86EMUL_CONTINUE;
vm_exit_reason = EXIT_REASON_HLT;
break;
case x86_intercept_pause:
@ -8090,17 +8131,24 @@ int vmx_check_intercept(struct kvm_vcpu *vcpu,
* the PAUSE.
*/
if ((info->rep_prefix != REPE_PREFIX) ||
!nested_cpu_has2(vmcs12, CPU_BASED_PAUSE_EXITING))
!nested_cpu_has(vmcs12, CPU_BASED_PAUSE_EXITING))
return X86EMUL_CONTINUE;
vm_exit_reason = EXIT_REASON_PAUSE_INSTRUCTION;
break;
/* TODO: check more intercepts... */
default:
break;
return X86EMUL_UNHANDLEABLE;
}
return X86EMUL_UNHANDLEABLE;
exit_insn_len = abs_diff((s64)info->next_rip, (s64)info->rip);
if (!exit_insn_len || exit_insn_len > X86_MAX_INSTRUCTION_LENGTH)
return X86EMUL_UNHANDLEABLE;
__nested_vmx_vmexit(vcpu, vm_exit_reason, 0, exit_qualification,
exit_insn_len);
return X86EMUL_INTERCEPTED;
}
#ifdef CONFIG_X86_64

View File

@ -800,9 +800,9 @@ static void kvm_queue_exception_vmexit(struct kvm_vcpu *vcpu, unsigned int vecto
ex->payload = payload;
}
static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
unsigned nr, bool has_error, u32 error_code,
bool has_payload, unsigned long payload, bool reinject)
static void kvm_multiple_exception(struct kvm_vcpu *vcpu, unsigned int nr,
bool has_error, u32 error_code,
bool has_payload, unsigned long payload)
{
u32 prev_nr;
int class1, class2;
@ -810,13 +810,10 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
kvm_make_request(KVM_REQ_EVENT, vcpu);
/*
* If the exception is destined for L2 and isn't being reinjected,
* morph it to a VM-Exit if L1 wants to intercept the exception. A
* previously injected exception is not checked because it was checked
* when it was original queued, and re-checking is incorrect if _L1_
* injected the exception, in which case it's exempt from interception.
* If the exception is destined for L2, morph it to a VM-Exit if L1
* wants to intercept the exception.
*/
if (!reinject && is_guest_mode(vcpu) &&
if (is_guest_mode(vcpu) &&
kvm_x86_ops.nested_ops->is_exception_vmexit(vcpu, nr, error_code)) {
kvm_queue_exception_vmexit(vcpu, nr, has_error, error_code,
has_payload, payload);
@ -825,28 +822,9 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
if (!vcpu->arch.exception.pending && !vcpu->arch.exception.injected) {
queue:
if (reinject) {
/*
* On VM-Entry, an exception can be pending if and only
* if event injection was blocked by nested_run_pending.
* In that case, however, vcpu_enter_guest() requests an
* immediate exit, and the guest shouldn't proceed far
* enough to need reinjection.
*/
WARN_ON_ONCE(kvm_is_exception_pending(vcpu));
vcpu->arch.exception.injected = true;
if (WARN_ON_ONCE(has_payload)) {
/*
* A reinjected event has already
* delivered its payload.
*/
has_payload = false;
payload = 0;
}
} else {
vcpu->arch.exception.pending = true;
vcpu->arch.exception.injected = false;
}
vcpu->arch.exception.pending = true;
vcpu->arch.exception.injected = false;
vcpu->arch.exception.has_error_code = has_error;
vcpu->arch.exception.vector = nr;
vcpu->arch.exception.error_code = error_code;
@ -887,30 +865,53 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
{
kvm_multiple_exception(vcpu, nr, false, 0, false, 0, false);
kvm_multiple_exception(vcpu, nr, false, 0, false, 0);
}
EXPORT_SYMBOL_GPL(kvm_queue_exception);
void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr)
{
kvm_multiple_exception(vcpu, nr, false, 0, false, 0, true);
}
EXPORT_SYMBOL_GPL(kvm_requeue_exception);
void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr,
unsigned long payload)
{
kvm_multiple_exception(vcpu, nr, false, 0, true, payload, false);
kvm_multiple_exception(vcpu, nr, false, 0, true, payload);
}
EXPORT_SYMBOL_GPL(kvm_queue_exception_p);
static void kvm_queue_exception_e_p(struct kvm_vcpu *vcpu, unsigned nr,
u32 error_code, unsigned long payload)
{
kvm_multiple_exception(vcpu, nr, true, error_code,
true, payload, false);
kvm_multiple_exception(vcpu, nr, true, error_code, true, payload);
}
void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned int nr,
bool has_error_code, u32 error_code)
{
/*
* On VM-Entry, an exception can be pending if and only if event
* injection was blocked by nested_run_pending. In that case, however,
* vcpu_enter_guest() requests an immediate exit, and the guest
* shouldn't proceed far enough to need reinjection.
*/
WARN_ON_ONCE(kvm_is_exception_pending(vcpu));
/*
* Do not check for interception when injecting an event for L2, as the
* exception was checked for intercept when it was original queued, and
* re-checking is incorrect if _L1_ injected the exception, in which
* case it's exempt from interception.
*/
kvm_make_request(KVM_REQ_EVENT, vcpu);
vcpu->arch.exception.injected = true;
vcpu->arch.exception.has_error_code = has_error_code;
vcpu->arch.exception.vector = nr;
vcpu->arch.exception.error_code = error_code;
vcpu->arch.exception.has_payload = false;
vcpu->arch.exception.payload = 0;
}
EXPORT_SYMBOL_GPL(kvm_requeue_exception);
int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err)
{
if (err)
@ -980,16 +981,10 @@ void kvm_inject_nmi(struct kvm_vcpu *vcpu)
void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
{
kvm_multiple_exception(vcpu, nr, true, error_code, false, 0, false);
kvm_multiple_exception(vcpu, nr, true, error_code, false, 0);
}
EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
{
kvm_multiple_exception(vcpu, nr, true, error_code, false, 0, true);
}
EXPORT_SYMBOL_GPL(kvm_requeue_exception_e);
/*
* Checks if cpl <= required_cpl; if true, return true. Otherwise queue
* a #GP and return false.
@ -1264,7 +1259,7 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
vcpu->arch.xcr0 = xcr0;
if ((xcr0 ^ old_xcr0) & XFEATURE_MASK_EXTEND)
kvm_update_cpuid_runtime(vcpu);
vcpu->arch.cpuid_dynamic_bits_dirty = true;
return 0;
}
@ -2080,10 +2075,20 @@ EXPORT_SYMBOL_GPL(kvm_handle_invalid_op);
static int kvm_emulate_monitor_mwait(struct kvm_vcpu *vcpu, const char *insn)
{
if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS) &&
!guest_cpu_cap_has(vcpu, X86_FEATURE_MWAIT))
bool enabled;
if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS))
goto emulate_as_nop;
if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT))
enabled = guest_cpu_cap_has(vcpu, X86_FEATURE_MWAIT);
else
enabled = vcpu->arch.ia32_misc_enable_msr & MSR_IA32_MISC_ENABLE_MWAIT;
if (!enabled)
return kvm_handle_invalid_op(vcpu);
emulate_as_nop:
pr_warn_once("%s instruction emulated as NOP!\n", insn);
return kvm_emulate_as_nop(vcpu);
}
@ -3544,7 +3549,7 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
sizeof(u64)))
return 1;
vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS);
vcpu->arch.apf.send_always = (data & KVM_ASYNC_PF_SEND_ALWAYS);
vcpu->arch.apf.delivery_as_pf_vmexit = data & KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;
kvm_async_pf_wakeup_all(vcpu);
@ -3889,7 +3894,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (!guest_cpu_cap_has(vcpu, X86_FEATURE_XMM3))
return 1;
vcpu->arch.ia32_misc_enable_msr = data;
kvm_update_cpuid_runtime(vcpu);
vcpu->arch.cpuid_dynamic_bits_dirty = true;
} else {
vcpu->arch.ia32_misc_enable_msr = data;
}
@ -3924,7 +3929,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (data & ~kvm_caps.supported_xss)
return 1;
vcpu->arch.ia32_xss = data;
kvm_update_cpuid_runtime(vcpu);
vcpu->arch.cpuid_dynamic_bits_dirty = true;
break;
case MSR_SMI_COUNT:
if (!msr_info->host_initiated)
@ -11220,9 +11225,7 @@ static inline int vcpu_block(struct kvm_vcpu *vcpu)
switch(vcpu->arch.mp_state) {
case KVM_MP_STATE_HALTED:
case KVM_MP_STATE_AP_RESET_HOLD:
vcpu->arch.pv.pv_unhalted = false;
vcpu->arch.mp_state =
KVM_MP_STATE_RUNNABLE;
kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE);
fallthrough;
case KVM_MP_STATE_RUNNABLE:
vcpu->arch.apf.halted = false;
@ -11299,9 +11302,8 @@ static int __kvm_emulate_halt(struct kvm_vcpu *vcpu, int state, int reason)
++vcpu->stat.halt_exits;
if (lapic_in_kernel(vcpu)) {
if (kvm_vcpu_has_events(vcpu))
vcpu->arch.pv.pv_unhalted = false;
else
vcpu->arch.mp_state = state;
state = KVM_MP_STATE_RUNNABLE;
kvm_set_mp_state(vcpu, state);
return 1;
} else {
vcpu->run->exit_reason = reason;
@ -11821,10 +11823,10 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
goto out;
if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) {
vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
kvm_set_mp_state(vcpu, KVM_MP_STATE_INIT_RECEIVED);
set_bit(KVM_APIC_SIPI, &vcpu->arch.apic->pending_events);
} else
vcpu->arch.mp_state = mp_state->mp_state;
kvm_set_mp_state(vcpu, mp_state->mp_state);
kvm_make_request(KVM_REQ_EVENT, vcpu);
ret = 0;
@ -11951,7 +11953,7 @@ static int __set_sregs_common(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs,
if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 &&
sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 &&
!is_protmode(vcpu))
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE);
return 0;
}
@ -12254,9 +12256,9 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
kvm_gpc_init(&vcpu->arch.pv_time, vcpu->kvm);
if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu))
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE);
else
vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
kvm_set_mp_state(vcpu, KVM_MP_STATE_UNINITIALIZED);
r = kvm_mmu_create(vcpu);
if (r < 0)
@ -13383,8 +13385,8 @@ static bool kvm_can_deliver_async_pf(struct kvm_vcpu *vcpu)
if (!kvm_pv_async_pf_enabled(vcpu))
return false;
if (vcpu->arch.apf.send_user_only &&
kvm_x86_call(get_cpl)(vcpu) == 0)
if (!vcpu->arch.apf.send_always &&
(vcpu->arch.guest_state_protected || !kvm_x86_call(get_cpl)(vcpu)))
return false;
if (is_guest_mode(vcpu)) {
@ -13474,7 +13476,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
}
vcpu->arch.apf.halted = false;
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE);
}
void kvm_arch_async_page_present_queued(struct kvm_vcpu *vcpu)

View File

@ -121,6 +121,13 @@ static inline bool kvm_vcpu_has_run(struct kvm_vcpu *vcpu)
return vcpu->arch.last_vmentry_cpu != -1;
}
static inline void kvm_set_mp_state(struct kvm_vcpu *vcpu, int mp_state)
{
vcpu->arch.mp_state = mp_state;
if (mp_state == KVM_MP_STATE_RUNNABLE)
vcpu->arch.pv.pv_unhalted = false;
}
static inline bool kvm_is_exception_pending(struct kvm_vcpu *vcpu)
{
return vcpu->arch.exception.pending ||

View File

@ -1480,7 +1480,7 @@ static bool kvm_xen_schedop_poll(struct kvm_vcpu *vcpu, bool longmode,
set_bit(vcpu->vcpu_idx, vcpu->kvm->arch.xen.poll_mask);
if (!wait_pending_event(vcpu, sched_poll.nr_ports, ports)) {
vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
kvm_set_mp_state(vcpu, KVM_MP_STATE_HALTED);
if (sched_poll.timeout)
mod_timer(&vcpu->arch.xen.poll_timer,
@ -1491,7 +1491,7 @@ static bool kvm_xen_schedop_poll(struct kvm_vcpu *vcpu, bool longmode,
if (sched_poll.timeout)
del_timer(&vcpu->arch.xen.poll_timer);
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE);
}
vcpu->arch.xen.poll_evtchn = 0;

View File

@ -69,6 +69,7 @@ TEST_GEN_PROGS_x86 += x86/hyperv_tlb_flush
TEST_GEN_PROGS_x86 += x86/kvm_clock_test
TEST_GEN_PROGS_x86 += x86/kvm_pv_test
TEST_GEN_PROGS_x86 += x86/monitor_mwait_test
TEST_GEN_PROGS_x86 += x86/nested_emulation_test
TEST_GEN_PROGS_x86 += x86/nested_exceptions_test
TEST_GEN_PROGS_x86 += x86/platform_info_test
TEST_GEN_PROGS_x86 += x86/pmu_counters_test

View File

@ -0,0 +1,146 @@
// SPDX-License-Identifier: GPL-2.0-only
#include "test_util.h"
#include "kvm_util.h"
#include "processor.h"
#include "vmx.h"
#include "svm_util.h"
enum {
SVM_F,
VMX_F,
NR_VIRTUALIZATION_FLAVORS,
};
struct emulated_instruction {
const char name[32];
uint8_t opcode[15];
uint32_t exit_reason[NR_VIRTUALIZATION_FLAVORS];
};
static struct emulated_instruction instructions[] = {
{
.name = "pause",
.opcode = { 0xf3, 0x90 },
.exit_reason = { SVM_EXIT_PAUSE,
EXIT_REASON_PAUSE_INSTRUCTION, }
},
{
.name = "hlt",
.opcode = { 0xf4 },
.exit_reason = { SVM_EXIT_HLT,
EXIT_REASON_HLT, }
},
};
static uint8_t kvm_fep[] = { 0x0f, 0x0b, 0x6b, 0x76, 0x6d }; /* ud2 ; .ascii "kvm" */
static uint8_t l2_guest_code[sizeof(kvm_fep) + 15];
static uint8_t *l2_instruction = &l2_guest_code[sizeof(kvm_fep)];
static uint32_t get_instruction_length(struct emulated_instruction *insn)
{
uint32_t i;
for (i = 0; i < ARRAY_SIZE(insn->opcode) && insn->opcode[i]; i++)
;
return i;
}
static void guest_code(void *test_data)
{
int f = this_cpu_has(X86_FEATURE_SVM) ? SVM_F : VMX_F;
int i;
memcpy(l2_guest_code, kvm_fep, sizeof(kvm_fep));
if (f == SVM_F) {
struct svm_test_data *svm = test_data;
struct vmcb *vmcb = svm->vmcb;
generic_svm_setup(svm, NULL, NULL);
vmcb->save.idtr.limit = 0;
vmcb->save.rip = (u64)l2_guest_code;
vmcb->control.intercept |= BIT_ULL(INTERCEPT_SHUTDOWN) |
BIT_ULL(INTERCEPT_PAUSE) |
BIT_ULL(INTERCEPT_HLT);
vmcb->control.intercept_exceptions = 0;
} else {
GUEST_ASSERT(prepare_for_vmx_operation(test_data));
GUEST_ASSERT(load_vmcs(test_data));
prepare_vmcs(test_data, NULL, NULL);
GUEST_ASSERT(!vmwrite(GUEST_IDTR_LIMIT, 0));
GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_guest_code));
GUEST_ASSERT(!vmwrite(EXCEPTION_BITMAP, 0));
vmwrite(CPU_BASED_VM_EXEC_CONTROL, vmreadz(CPU_BASED_VM_EXEC_CONTROL) |
CPU_BASED_PAUSE_EXITING |
CPU_BASED_HLT_EXITING);
}
for (i = 0; i < ARRAY_SIZE(instructions); i++) {
struct emulated_instruction *insn = &instructions[i];
uint32_t insn_len = get_instruction_length(insn);
uint32_t exit_insn_len;
u32 exit_reason;
/*
* Copy the target instruction to the L2 code stream, and fill
* the remaining bytes with INT3s so that a missed intercept
* results in a consistent failure mode (SHUTDOWN).
*/
memcpy(l2_instruction, insn->opcode, insn_len);
memset(l2_instruction + insn_len, 0xcc, sizeof(insn->opcode) - insn_len);
if (f == SVM_F) {
struct svm_test_data *svm = test_data;
struct vmcb *vmcb = svm->vmcb;
run_guest(vmcb, svm->vmcb_gpa);
exit_reason = vmcb->control.exit_code;
exit_insn_len = vmcb->control.next_rip - vmcb->save.rip;
GUEST_ASSERT_EQ(vmcb->save.rip, (u64)l2_instruction);
} else {
GUEST_ASSERT_EQ(i ? vmresume() : vmlaunch(), 0);
exit_reason = vmreadz(VM_EXIT_REASON);
exit_insn_len = vmreadz(VM_EXIT_INSTRUCTION_LEN);
GUEST_ASSERT_EQ(vmreadz(GUEST_RIP), (u64)l2_instruction);
}
__GUEST_ASSERT(exit_reason == insn->exit_reason[f],
"Wanted exit_reason '0x%x' for '%s', got '0x%x'",
insn->exit_reason[f], insn->name, exit_reason);
__GUEST_ASSERT(exit_insn_len == insn_len,
"Wanted insn_len '%u' for '%s', got '%u'",
insn_len, insn->name, exit_insn_len);
}
GUEST_DONE();
}
int main(int argc, char *argv[])
{
vm_vaddr_t nested_test_data_gva;
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
TEST_REQUIRE(is_forced_emulation_enabled);
TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX));
vm = vm_create_with_one_vcpu(&vcpu, guest_code);
vm_enable_cap(vm, KVM_CAP_EXCEPTION_PAYLOAD, -2ul);
if (kvm_cpu_has(X86_FEATURE_SVM))
vcpu_alloc_svm(vm, &nested_test_data_gva);
else
vcpu_alloc_vmx(vm, &nested_test_data_gva);
vcpu_args_set(vcpu, 1, nested_test_data_gva);
vcpu_run(vcpu);
TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
kvm_vm_free(vm);
}