KVM: Allow lockless walk of SPTEs when handing aging mmu_notifier event

It is possible to correctly do aging without taking the KVM MMU lock,
or while taking it for read; add a Kconfig to let architectures do so.
Architectures that select KVM_MMU_LOCKLESS_AGING are responsible for
correctness.

Suggested-by: Yu Zhao <yuzhao@google.com>
Signed-off-by: James Houghton <jthoughton@google.com>
Reviewed-by: David Matlack <dmatlack@google.com>
Link: https://lore.kernel.org/r/20250204004038.1680123-3-jthoughton@google.com
[sean: massage shortlog+changelog, fix Kconfig goof and shorten name]
Signed-off-by: Sean Christopherson <seanjc@google.com>
This commit is contained in:
James Houghton 2025-02-04 00:40:29 +00:00 committed by Sean Christopherson
parent 374ccd6360
commit aa34b81165
3 changed files with 20 additions and 6 deletions

View File

@ -267,6 +267,7 @@ struct kvm_gfn_range {
union kvm_mmu_notifier_arg arg;
enum kvm_gfn_range_filter attr_filter;
bool may_block;
bool lockless;
};
bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range);
bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range);

View File

@ -104,6 +104,10 @@ config KVM_ELIDE_TLB_FLUSH_IF_YOUNG
depends on KVM_GENERIC_MMU_NOTIFIER
bool
config KVM_MMU_LOCKLESS_AGING
depends on KVM_GENERIC_MMU_NOTIFIER
bool
config KVM_GENERIC_MEMORY_ATTRIBUTES
depends on KVM_GENERIC_MMU_NOTIFIER
bool

View File

@ -517,6 +517,7 @@ struct kvm_mmu_notifier_range {
on_lock_fn_t on_lock;
bool flush_on_ret;
bool may_block;
bool lockless;
};
/*
@ -571,6 +572,10 @@ static __always_inline kvm_mn_ret_t kvm_handle_hva_range(struct kvm *kvm,
IS_KVM_NULL_FN(range->handler)))
return r;
/* on_lock will never be called for lockless walks */
if (WARN_ON_ONCE(range->lockless && !IS_KVM_NULL_FN(range->on_lock)))
return r;
idx = srcu_read_lock(&kvm->srcu);
for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) {
@ -607,15 +612,18 @@ static __always_inline kvm_mn_ret_t kvm_handle_hva_range(struct kvm *kvm,
gfn_range.start = hva_to_gfn_memslot(hva_start, slot);
gfn_range.end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, slot);
gfn_range.slot = slot;
gfn_range.lockless = range->lockless;
if (!r.found_memslot) {
r.found_memslot = true;
KVM_MMU_LOCK(kvm);
if (!IS_KVM_NULL_FN(range->on_lock))
range->on_lock(kvm);
if (!range->lockless) {
KVM_MMU_LOCK(kvm);
if (!IS_KVM_NULL_FN(range->on_lock))
range->on_lock(kvm);
if (IS_KVM_NULL_FN(range->handler))
goto mmu_unlock;
if (IS_KVM_NULL_FN(range->handler))
goto mmu_unlock;
}
}
r.ret |= range->handler(kvm, &gfn_range);
}
@ -625,7 +633,7 @@ static __always_inline kvm_mn_ret_t kvm_handle_hva_range(struct kvm *kvm,
kvm_flush_remote_tlbs(kvm);
mmu_unlock:
if (r.found_memslot)
if (r.found_memslot && !range->lockless)
KVM_MMU_UNLOCK(kvm);
srcu_read_unlock(&kvm->srcu, idx);
@ -647,6 +655,7 @@ static __always_inline int kvm_age_hva_range(struct mmu_notifier *mn,
.on_lock = (void *)kvm_null_fn,
.flush_on_ret = flush_on_ret,
.may_block = false,
.lockless = IS_ENABLED(CONFIG_KVM_MMU_LOCKLESS_AGING),
};
return kvm_handle_hva_range(kvm, &range).ret;