KVM: arm64: Handle aborts from protected VMs

Introduce a new abort handler for resolving stage-2 page faults from
protected VMs by pinning and donating anonymous memory. This is
considerably simpler than the infamous user_mem_abort() as we only have
to deal with translation faults at the pte level.

Reviewed-by: Fuad Tabba <tabba@google.com>
Tested-by: Fuad Tabba <tabba@google.com>
Tested-by: Mostafa Saleh <smostafa@google.com>
Signed-off-by: Will Deacon <will@kernel.org>
Link: https://patch.msgid.link/20260330144841.26181-15-will@kernel.org
Signed-off-by: Marc Zyngier <maz@kernel.org>
This commit is contained in:
Will Deacon 2026-03-30 15:48:15 +01:00 committed by Marc Zyngier
parent 5fef16ef49
commit ea03466e80

View File

@ -1642,6 +1642,74 @@ static int gmem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
return ret != -EAGAIN ? ret : 0;
}
static int pkvm_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
struct kvm_memory_slot *memslot, unsigned long hva)
{
unsigned int flags = FOLL_HWPOISON | FOLL_LONGTERM | FOLL_WRITE;
struct kvm_pgtable *pgt = vcpu->arch.hw_mmu->pgt;
struct mm_struct *mm = current->mm;
struct kvm *kvm = vcpu->kvm;
void *hyp_memcache;
struct page *page;
int ret;
ret = prepare_mmu_memcache(vcpu, true, &hyp_memcache);
if (ret)
return -ENOMEM;
ret = account_locked_vm(mm, 1, true);
if (ret)
return ret;
mmap_read_lock(mm);
ret = pin_user_pages(hva, 1, flags, &page);
mmap_read_unlock(mm);
if (ret == -EHWPOISON) {
kvm_send_hwpoison_signal(hva, PAGE_SHIFT);
ret = 0;
goto dec_account;
} else if (ret != 1) {
ret = -EFAULT;
goto dec_account;
} else if (!folio_test_swapbacked(page_folio(page))) {
/*
* We really can't deal with page-cache pages returned by GUP
* because (a) we may trigger writeback of a page for which we
* no longer have access and (b) page_mkclean() won't find the
* stage-2 mapping in the rmap so we can get out-of-whack with
* the filesystem when marking the page dirty during unpinning
* (see cc5095747edf ("ext4: don't BUG if someone dirty pages
* without asking ext4 first")).
*
* Ideally we'd just restrict ourselves to anonymous pages, but
* we also want to allow memfd (i.e. shmem) pages, so check for
* pages backed by swap in the knowledge that the GUP pin will
* prevent try_to_unmap() from succeeding.
*/
ret = -EIO;
goto unpin;
}
write_lock(&kvm->mmu_lock);
ret = pkvm_pgtable_stage2_map(pgt, fault_ipa, PAGE_SIZE,
page_to_phys(page), KVM_PGTABLE_PROT_RWX,
hyp_memcache, 0);
write_unlock(&kvm->mmu_lock);
if (ret) {
if (ret == -EAGAIN)
ret = 0;
goto unpin;
}
return 0;
unpin:
unpin_user_pages(&page, 1);
dec_account:
account_locked_vm(mm, 1, false);
return ret;
}
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
struct kvm_s2_trans *nested,
struct kvm_memory_slot *memslot, unsigned long hva,
@ -2205,15 +2273,20 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
goto out_unlock;
}
VM_WARN_ON_ONCE(kvm_vcpu_trap_is_permission_fault(vcpu) &&
!write_fault && !kvm_vcpu_trap_is_exec_fault(vcpu));
if (kvm_vm_is_protected(vcpu->kvm)) {
ret = pkvm_mem_abort(vcpu, fault_ipa, memslot, hva);
} else {
VM_WARN_ON_ONCE(kvm_vcpu_trap_is_permission_fault(vcpu) &&
!write_fault &&
!kvm_vcpu_trap_is_exec_fault(vcpu));
if (kvm_slot_has_gmem(memslot))
ret = gmem_abort(vcpu, fault_ipa, nested, memslot,
esr_fsc_is_permission_fault(esr));
else
ret = user_mem_abort(vcpu, fault_ipa, nested, memslot, hva,
esr_fsc_is_permission_fault(esr));
if (kvm_slot_has_gmem(memslot))
ret = gmem_abort(vcpu, fault_ipa, nested, memslot,
esr_fsc_is_permission_fault(esr));
else
ret = user_mem_abort(vcpu, fault_ipa, nested, memslot, hva,
esr_fsc_is_permission_fault(esr));
}
if (ret == 0)
ret = 1;
out: