diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 1cc083f95e6a..76d959d15e14 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -845,24 +845,15 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm, vmcb02->control.event_inj_err = svm->nested.ctl.event_inj_err; /* - * NextRIP is consumed on VMRUN as the return address pushed on the - * stack for injected soft exceptions/interrupts. If nrips is exposed - * to L1, take it verbatim from vmcb12. - * - * If nrips is supported in hardware but not exposed to L1, stuff the - * actual L2 RIP to emulate what a nrips=0 CPU would do (L1 is - * responsible for advancing RIP prior to injecting the event). This is - * only the case for the first L2 run after VMRUN. After that (e.g. - * during save/restore), NextRIP is updated by the CPU and/or KVM, and - * the value of the L2 RIP from vmcb12 should not be used. + * If nrips is exposed to L1, take NextRIP as-is. Otherwise, L1 + * advances L2's RIP before VMRUN instead of using NextRIP. KVM will + * stuff the current RIP as vmcb02's NextRIP before L2 is run. After + * the first run of L2 (e.g. after save+restore), NextRIP is updated by + * the CPU and/or KVM and should be used regardless of L1's support. */ - if (boot_cpu_has(X86_FEATURE_NRIPS)) { - if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS) || - !svm->nested.nested_run_pending) - vmcb02->control.next_rip = svm->nested.ctl.next_rip; - else - vmcb02->control.next_rip = vmcb12_rip; - } + if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS) || + !svm->nested.nested_run_pending) + vmcb02->control.next_rip = svm->nested.ctl.next_rip; svm->nmi_l1_to_l2 = is_evtinj_nmi(vmcb02->control.event_inj); if (is_evtinj_soft(vmcb02->control.event_inj)) { diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 07f096758f34..f862bafc381a 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3738,6 +3738,29 @@ static void svm_inject_irq(struct kvm_vcpu *vcpu, bool reinjected) svm->vmcb->control.event_inj = intr->nr | SVM_EVTINJ_VALID | type; } +static void svm_fixup_nested_rips(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + if (!is_guest_mode(vcpu) || !svm->nested.nested_run_pending) + return; + + /* + * If nrips is supported in hardware but not exposed to L1, stuff the + * actual L2 RIP to emulate what a nrips=0 CPU would do (L1 is + * responsible for advancing RIP prior to injecting the event). Once L2 + * runs after L1 executes VMRUN, NextRIP is updated by the CPU and/or + * KVM, and this is no longer needed. + * + * This is done here (as opposed to when preparing vmcb02) to use the + * most up-to-date value of RIP regardless of the order of restoring + * registers and nested state in the vCPU save+restore path. + */ + if (boot_cpu_has(X86_FEATURE_NRIPS) && + !guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS)) + svm->vmcb->control.next_rip = kvm_rip_read(vcpu); +} + void svm_complete_interrupt_delivery(struct kvm_vcpu *vcpu, int delivery_mode, int trig_mode, int vector) { @@ -4334,6 +4357,8 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags) kvm_register_is_dirty(vcpu, VCPU_EXREG_ERAPS)) svm->vmcb->control.erap_ctl |= ERAP_CONTROL_CLEAR_RAP; + svm_fixup_nested_rips(vcpu); + svm_hv_update_vp_id(svm->vmcb, vcpu); /*