mirror of
https://github.com/torvalds/linux.git
synced 2026-05-31 10:33:41 +02:00
KVM: x86/mmu: Age TDP MMU SPTEs without holding mmu_lock
Walk the TDP MMU in an RCU read-side critical section without holding mmu_lock when harvesting and potentially updating age information on TDP MMU SPTEs. Add a new macro to do RCU-safe walking of TDP MMU roots, and do all SPTE aging with atomic updates; while clobbering Accessed information is ok, KVM must not corrupt other bits, e.g. must not drop a Dirty or Writable bit when making a SPTE young.. If updating a SPTE to mark it for access tracking fails, leave it as is and treat it as if it were young. If the spte is being actively modified, it is most likely young. Acquire and release mmu_lock for write when harvesting age information from the shadow MMU, as the shadow MMU doesn't yet support aging outside of mmu_lock. Suggested-by: Yu Zhao <yuzhao@google.com> Signed-off-by: James Houghton <jthoughton@google.com> Reviewed-by: David Matlack <dmatlack@google.com> Link: https://lore.kernel.org/r/20250204004038.1680123-5-jthoughton@google.com [sean: massage changelog] Signed-off-by: Sean Christopherson <seanjc@google.com>
This commit is contained in:
parent
928c54b1c4
commit
b146a9b34a
|
|
@ -1478,6 +1478,7 @@ struct kvm_arch {
|
|||
* tdp_mmu_page set.
|
||||
*
|
||||
* For reads, this list is protected by:
|
||||
* RCU alone or
|
||||
* the MMU lock in read mode + RCU or
|
||||
* the MMU lock in write mode
|
||||
*
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ config KVM_X86
|
|||
select KVM_COMMON
|
||||
select KVM_GENERIC_MMU_NOTIFIER
|
||||
select KVM_ELIDE_TLB_FLUSH_IF_YOUNG
|
||||
select KVM_MMU_LOCKLESS_AGING
|
||||
select HAVE_KVM_IRQCHIP
|
||||
select HAVE_KVM_PFNCACHE
|
||||
select HAVE_KVM_DIRTY_RING_TSO
|
||||
|
|
|
|||
|
|
@ -1592,8 +1592,11 @@ bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
|
|||
{
|
||||
bool young = false;
|
||||
|
||||
if (kvm_memslots_have_rmaps(kvm))
|
||||
if (kvm_memslots_have_rmaps(kvm)) {
|
||||
write_lock(&kvm->mmu_lock);
|
||||
young = kvm_rmap_age_gfn_range(kvm, range, false);
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
}
|
||||
|
||||
if (tdp_mmu_enabled)
|
||||
young |= kvm_tdp_mmu_age_gfn_range(kvm, range);
|
||||
|
|
@ -1605,8 +1608,11 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
|
|||
{
|
||||
bool young = false;
|
||||
|
||||
if (kvm_memslots_have_rmaps(kvm))
|
||||
if (kvm_memslots_have_rmaps(kvm)) {
|
||||
write_lock(&kvm->mmu_lock);
|
||||
young = kvm_rmap_age_gfn_range(kvm, range, true);
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
}
|
||||
|
||||
if (tdp_mmu_enabled)
|
||||
young |= kvm_tdp_mmu_test_age_gfn(kvm, range);
|
||||
|
|
|
|||
|
|
@ -193,6 +193,19 @@ static struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm,
|
|||
!tdp_mmu_root_match((_root), (_types)))) { \
|
||||
} else
|
||||
|
||||
/*
|
||||
* Iterate over all TDP MMU roots in an RCU read-side critical section.
|
||||
* It is safe to iterate over the SPTEs under the root, but their values will
|
||||
* be unstable, so all writes must be atomic. As this routine is meant to be
|
||||
* used without holding the mmu_lock at all, any bits that are flipped must
|
||||
* be reflected in kvm_tdp_mmu_spte_need_atomic_write().
|
||||
*/
|
||||
#define for_each_tdp_mmu_root_rcu(_kvm, _root, _as_id, _types) \
|
||||
list_for_each_entry_rcu(_root, &_kvm->arch.tdp_mmu_roots, link) \
|
||||
if ((_as_id >= 0 && kvm_mmu_page_as_id(_root) != _as_id) || \
|
||||
!tdp_mmu_root_match((_root), (_types))) { \
|
||||
} else
|
||||
|
||||
#define for_each_valid_tdp_mmu_root(_kvm, _root, _as_id) \
|
||||
__for_each_tdp_mmu_root(_kvm, _root, _as_id, KVM_VALID_ROOTS)
|
||||
|
||||
|
|
@ -1332,21 +1345,22 @@ bool kvm_tdp_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range,
|
|||
* from the clear_young() or clear_flush_young() notifier, which uses the
|
||||
* return value to determine if the page has been accessed.
|
||||
*/
|
||||
static void kvm_tdp_mmu_age_spte(struct tdp_iter *iter)
|
||||
static void kvm_tdp_mmu_age_spte(struct kvm *kvm, struct tdp_iter *iter)
|
||||
{
|
||||
u64 new_spte;
|
||||
|
||||
if (spte_ad_enabled(iter->old_spte)) {
|
||||
iter->old_spte = tdp_mmu_clear_spte_bits(iter->sptep,
|
||||
iter->old_spte,
|
||||
shadow_accessed_mask,
|
||||
iter->level);
|
||||
iter->old_spte = tdp_mmu_clear_spte_bits_atomic(iter->sptep,
|
||||
shadow_accessed_mask);
|
||||
new_spte = iter->old_spte & ~shadow_accessed_mask;
|
||||
} else {
|
||||
new_spte = mark_spte_for_access_track(iter->old_spte);
|
||||
iter->old_spte = kvm_tdp_mmu_write_spte(iter->sptep,
|
||||
iter->old_spte, new_spte,
|
||||
iter->level);
|
||||
/*
|
||||
* It is safe for the following cmpxchg to fail. Leave the
|
||||
* Accessed bit set, as the spte is most likely young anyway.
|
||||
*/
|
||||
if (__tdp_mmu_set_spte_atomic(kvm, iter, new_spte))
|
||||
return;
|
||||
}
|
||||
|
||||
trace_kvm_tdp_mmu_spte_changed(iter->as_id, iter->gfn, iter->level,
|
||||
|
|
@ -1371,9 +1385,9 @@ static bool __kvm_tdp_mmu_age_gfn_range(struct kvm *kvm,
|
|||
* valid roots!
|
||||
*/
|
||||
WARN_ON(types & ~KVM_VALID_ROOTS);
|
||||
__for_each_tdp_mmu_root(kvm, root, range->slot->as_id, types) {
|
||||
guard(rcu)();
|
||||
|
||||
guard(rcu)();
|
||||
for_each_tdp_mmu_root_rcu(kvm, root, range->slot->as_id, types) {
|
||||
tdp_root_for_each_leaf_pte(iter, kvm, root, range->start, range->end) {
|
||||
if (!is_accessed_spte(iter.old_spte))
|
||||
continue;
|
||||
|
|
@ -1382,7 +1396,7 @@ static bool __kvm_tdp_mmu_age_gfn_range(struct kvm *kvm,
|
|||
return true;
|
||||
|
||||
ret = true;
|
||||
kvm_tdp_mmu_age_spte(&iter);
|
||||
kvm_tdp_mmu_age_spte(kvm, &iter);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user