From ea9fcdf76d3d1d659fb3daaa0cca7b1f4a0697bc Mon Sep 17 00:00:00 2001 From: Yan Zhao Date: Tue, 18 Mar 2025 09:31:11 +0800 Subject: [PATCH 1/5] KVM: x86/mmu: Further check old SPTE is leaf for spurious prefetch fault Instead of simply treating a prefetch fault as spurious when there's a shadow-present old SPTE, further check if the old SPTE is leaf to determine if a prefetch fault is spurious. It's not reasonable to treat a prefetch fault as spurious when there's a shadow-present non-leaf SPTE without a corresponding shadow-present leaf SPTE. e.g., in the following sequence, a prefetch fault should not be considered spurious: 1. add a memslot with size 4K 2. prefault GPA A in the memslot 3. delete the memslot (zap all disabled) 4. re-add the memslot with size 2M 5. prefault GPA A again. In step 5, the prefetch fault attempts to install a 2M huge entry. Since step 3 zaps the leaf SPTE for GPA A while keeping the non-leaf SPTE, the leaf entry will remain empty after step 5 if the fetch fault is regarded as spurious due to a shadow-present non-leaf SPTE. Signed-off-by: Yan Zhao Link: https://lore.kernel.org/r/20250318013111.5648-1-yan.y.zhao@intel.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/mmu/mmu.c | 2 +- arch/x86/kvm/mmu/tdp_mmu.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index a284dce227a0..b50d9e715806 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3020,7 +3020,7 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot, } if (is_shadow_present_pte(*sptep)) { - if (prefetch) + if (prefetch && is_last_spte(*sptep, level)) return RET_PF_SPURIOUS; /* diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 405874f4d088..f53402257217 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -1153,7 +1153,8 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, if (WARN_ON_ONCE(sp->role.level != fault->goal_level)) return RET_PF_RETRY; - if (fault->prefetch && is_shadow_present_pte(iter->old_spte)) + if (fault->prefetch && is_shadow_present_pte(iter->old_spte) && + is_last_spte(iter->old_spte, iter->level)) return RET_PF_SPURIOUS; if (is_shadow_present_pte(iter->old_spte) && From d17cc13cc484821b03e5c1cc5808ecb83d10027e Mon Sep 17 00:00:00 2001 From: Yan Zhao Date: Tue, 18 Mar 2025 09:32:10 +0800 Subject: [PATCH 2/5] KVM: x86/tdp_mmu: Merge prefetch and access checks for spurious faults Combine prefetch and is_access_allowed() checks into a unified path to detect spurious faults, since both cases now share identical logic. No functional changes. Signed-off-by: Yan Zhao Link: https://lore.kernel.org/r/20250318013210.5701-1-yan.y.zhao@intel.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/mmu/tdp_mmu.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index f53402257217..d16e3e6e46a5 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -1153,12 +1153,8 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, if (WARN_ON_ONCE(sp->role.level != fault->goal_level)) return RET_PF_RETRY; - if (fault->prefetch && is_shadow_present_pte(iter->old_spte) && - is_last_spte(iter->old_spte, iter->level)) - return RET_PF_SPURIOUS; - if (is_shadow_present_pte(iter->old_spte) && - is_access_allowed(fault, iter->old_spte) && + (fault->prefetch || is_access_allowed(fault, iter->old_spte)) && is_last_spte(iter->old_spte, iter->level)) return RET_PF_SPURIOUS; From 988da7820206405ef2d8afde83c2c2cd7af503a2 Mon Sep 17 00:00:00 2001 From: Yan Zhao Date: Tue, 18 Mar 2025 09:32:38 +0800 Subject: [PATCH 3/5] KVM: x86/tdp_mmu: WARN if PFN changes for spurious faults Add a WARN() to assert that KVM does _not_ change the PFN of a shadow-present SPTE during spurious fault handling. KVM should _never_ change the PFN of a shadow-present SPTE and TDP MMU already BUG()s on this. However, spurious faults just return early before the existing BUG() could be hit. Suggested-by: Sean Christopherson Signed-off-by: Yan Zhao Link: https://lore.kernel.org/r/20250318013238.5732-1-yan.y.zhao@intel.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/mmu/tdp_mmu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index d16e3e6e46a5..1e2db4917459 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -1155,8 +1155,10 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, if (is_shadow_present_pte(iter->old_spte) && (fault->prefetch || is_access_allowed(fault, iter->old_spte)) && - is_last_spte(iter->old_spte, iter->level)) + is_last_spte(iter->old_spte, iter->level)) { + WARN_ON_ONCE(fault->pfn != spte_to_pfn(iter->old_spte)); return RET_PF_SPURIOUS; + } if (unlikely(!fault->slot)) new_spte = make_mmio_spte(vcpu, iter->gfn, ACC_ALL); From 11d45175111d933c5175acc28e56af2213dd5cd6 Mon Sep 17 00:00:00 2001 From: Yan Zhao Date: Tue, 18 Mar 2025 09:33:10 +0800 Subject: [PATCH 4/5] KVM: x86/mmu: Warn if PFN changes on shadow-present SPTE in shadow MMU Warn if PFN changes on shadow-present SPTE in mmu_set_spte(). KVM should _never_ change the PFN of a shadow-present SPTE. In mmu_set_spte(), there is a WARN_ON_ONCE() on pfn changes on shadow-present SPTE in mmu_spte_update() to detect this condition. However, that WARN_ON_ONCE() is not hittable since mmu_set_spte() invokes drop_spte() earlier before mmu_spte_update(), which clears SPTE to a !shadow-present state. So, before invoking drop_spte(), add a WARN_ON_ONCE() in mmu_set_spte() to warn PFN change of a shadow-present SPTE. For the spurious prefetch fault, only return RET_PF_SPURIOUS directly when PFN is not changed. When PFN changes, fall through to follow the sequence of drop_spte(), warn of PFN change, make_spte(), flush tlb, rmap_add(). Signed-off-by: Yan Zhao Link: https://lore.kernel.org/r/20250318013310.5781-1-yan.y.zhao@intel.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/mmu/mmu.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index b50d9e715806..221060b578f9 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3020,7 +3020,8 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot, } if (is_shadow_present_pte(*sptep)) { - if (prefetch && is_last_spte(*sptep, level)) + if (prefetch && is_last_spte(*sptep, level) && + pfn == spte_to_pfn(*sptep)) return RET_PF_SPURIOUS; /* @@ -3034,7 +3035,7 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot, child = spte_to_child_sp(pte); drop_parent_pte(vcpu->kvm, child, sptep); flush = true; - } else if (pfn != spte_to_pfn(*sptep)) { + } else if (WARN_ON_ONCE(pfn != spte_to_pfn(*sptep))) { drop_spte(vcpu->kvm, sptep); flush = true; } else From 6a3d704959bd04ab37fc588aff70b3078f3c90e8 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 31 Mar 2025 11:27:03 -0700 Subject: [PATCH 5/5] KVM: x86/mmu: Use kvm_x86_call() instead of manual static_call() Use KVM's preferred kvm_x86_call() wrapper to invoke static calls related to mirror page tables. No functional change intended. Fixes: 77ac7079e66d ("KVM: x86/tdp_mmu: Propagate building mirror page tables") Fixes: 94faba8999b9 ("KVM: x86/tdp_mmu: Propagate tearing down mirror page tables") Reviewed-by: Kai Huang Link: https://lore.kernel.org/r/20250331182703.725214-1-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/mmu/tdp_mmu.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 1e2db4917459..7f3d7229b2c1 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -378,7 +378,7 @@ static void remove_external_spte(struct kvm *kvm, gfn_t gfn, u64 old_spte, /* Zapping leaf spte is allowed only when write lock is held. */ lockdep_assert_held_write(&kvm->mmu_lock); /* Because write lock is held, operation should success. */ - ret = static_call(kvm_x86_remove_external_spte)(kvm, gfn, level, old_pfn); + ret = kvm_x86_call(remove_external_spte)(kvm, gfn, level, old_pfn); KVM_BUG_ON(ret, kvm); } @@ -485,8 +485,8 @@ static void handle_removed_pt(struct kvm *kvm, tdp_ptep_t pt, bool shared) } if (is_mirror_sp(sp) && - WARN_ON(static_call(kvm_x86_free_external_spt)(kvm, base_gfn, sp->role.level, - sp->external_spt))) { + WARN_ON(kvm_x86_call(free_external_spt)(kvm, base_gfn, sp->role.level, + sp->external_spt))) { /* * Failed to free page table page in mirror page table and * there is nothing to do further. @@ -538,12 +538,12 @@ static int __must_check set_external_spte_present(struct kvm *kvm, tdp_ptep_t sp * external page table, or leaf. */ if (is_leaf) { - ret = static_call(kvm_x86_set_external_spte)(kvm, gfn, level, new_pfn); + ret = kvm_x86_call(set_external_spte)(kvm, gfn, level, new_pfn); } else { void *external_spt = get_external_spt(gfn, new_spte, level); KVM_BUG_ON(!external_spt, kvm); - ret = static_call(kvm_x86_link_external_spt)(kvm, gfn, level, external_spt); + ret = kvm_x86_call(link_external_spt)(kvm, gfn, level, external_spt); } if (ret) __kvm_tdp_mmu_write_spte(sptep, old_spte);