diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index ff07c45e3c73..68db00dc09a0 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1433,6 +1433,7 @@ struct kvm_arch { struct kvm_pit *vpit; #endif atomic_t vapics_in_nmi_mode; + struct mutex apic_map_lock; struct kvm_apic_map __rcu *apic_map; atomic_t apic_map_dirty; @@ -1440,9 +1441,13 @@ struct kvm_arch { bool apic_access_memslot_enabled; bool apic_access_memslot_inhibited; - /* Protects apicv_inhibit_reasons */ + /* + * Protects apicv_inhibit_reasons and apicv_nr_irq_window_req (with an + * asterisk, see kvm_inc_or_dec_irq_window_inhibit() for details). + */ struct rw_semaphore apicv_update_lock; unsigned long apicv_inhibit_reasons; + atomic_t apicv_nr_irq_window_req; gpa_t wall_clock; @@ -2316,6 +2321,18 @@ static inline void kvm_clear_apicv_inhibit(struct kvm *kvm, kvm_set_or_clear_apicv_inhibit(kvm, reason, false); } +void kvm_inc_or_dec_irq_window_inhibit(struct kvm *kvm, bool inc); + +static inline void kvm_inc_apicv_irq_window_req(struct kvm *kvm) +{ + kvm_inc_or_dec_irq_window_inhibit(kvm, true); +} + +static inline void kvm_dec_apicv_irq_window_req(struct kvm *kvm) +{ + kvm_inc_or_dec_irq_window_inhibit(kvm, false); +} + int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code, void *insn, int insn_len); void kvm_mmu_print_sptes(struct kvm_vcpu *vcpu, gpa_t gpa, const char *msg); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 8766fd5f6d2b..e0da247ee594 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3729,8 +3729,11 @@ static void svm_inject_irq(struct kvm_vcpu *vcpu, bool reinjected) * the case in which the interrupt window was requested while L1 was * active (the vCPU was not running nested). */ - if (!kvm_cpu_has_injectable_intr(vcpu) || is_guest_mode(vcpu)) - kvm_clear_apicv_inhibit(vcpu->kvm, APICV_INHIBIT_REASON_IRQWIN); + if (svm->avic_irq_window && + (!kvm_cpu_has_injectable_intr(vcpu) || is_guest_mode(vcpu))) { + svm->avic_irq_window = false; + kvm_dec_apicv_irq_window_req(svm->vcpu.kvm); + } trace_kvm_inj_virq(intr->nr, intr->soft, reinjected); ++vcpu->stat.irq_injections; @@ -3932,17 +3935,28 @@ static void svm_enable_irq_window(struct kvm_vcpu *vcpu) */ if (vgif || gif_set(svm)) { /* - * IRQ window is not needed when AVIC is enabled, - * unless we have pending ExtINT since it cannot be injected - * via AVIC. In such case, KVM needs to temporarily disable AVIC, - * and fallback to injecting IRQ via V_IRQ. + * KVM only enables IRQ windows when AVIC is enabled if there's + * pending ExtINT since it cannot be injected via AVIC (ExtINT + * bypasses the local APIC). V_IRQ is ignored by hardware when + * AVIC is enabled, and so KVM needs to temporarily disable + * AVIC in order to detect when it's ok to inject the ExtINT. * - * If running nested, AVIC is already locally inhibited - * on this vCPU, therefore there is no need to request - * the VM wide AVIC inhibition. + * If running nested, AVIC is already locally inhibited on this + * vCPU (L2 vCPUs use a different MMU that never maps the AVIC + * backing page), therefore there is no need to increment the + * VM-wide AVIC inhibit. KVM will re-evaluate events when the + * vCPU exits to L1 and enable an IRQ window if the ExtINT is + * still pending. + * + * Note, the IRQ window inhibit needs to be updated even if + * AVIC is inhibited for a different reason, as KVM needs to + * keep AVIC inhibited if the other reason is cleared and there + * is still an injectable interrupt pending. */ - if (!is_guest_mode(vcpu)) - kvm_set_apicv_inhibit(vcpu->kvm, APICV_INHIBIT_REASON_IRQWIN); + if (enable_apicv && !svm->avic_irq_window && !is_guest_mode(vcpu)) { + svm->avic_irq_window = true; + kvm_inc_apicv_irq_window_req(vcpu->kvm); + } svm_set_vintr(svm); } diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index ebd7b36b1ceb..68675b25ef8e 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -333,6 +333,7 @@ struct vcpu_svm { bool guest_state_loaded; + bool avic_irq_window; bool x2avic_msrs_intercepted; bool lbr_msrs_intercepted; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a03530795707..db25938b6b50 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -11014,6 +11014,25 @@ void kvm_set_or_clear_apicv_inhibit(struct kvm *kvm, } EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_or_clear_apicv_inhibit); +void kvm_inc_or_dec_irq_window_inhibit(struct kvm *kvm, bool inc) +{ + int add = inc ? 1 : -1; + + if (!enable_apicv) + return; + + /* + * Strictly speaking, the lock is only needed if going 0->1 or 1->0, + * a la atomic_dec_and_mutex_lock. However, ExtINTs are rare and + * only target a single CPU, so that is the common case; do not + * bother eliding the down_write()/up_write() pair. + */ + guard(rwsem_write)(&kvm->arch.apicv_update_lock); + if (atomic_add_return(add, &kvm->arch.apicv_nr_irq_window_req) == inc) + __kvm_set_or_clear_apicv_inhibit(kvm, APICV_INHIBIT_REASON_IRQWIN, inc); +} +EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_inc_or_dec_irq_window_inhibit); + static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) { if (!kvm_apic_present(vcpu))