KVM: arm64: nv: Handle mapping of VNCR_EL2 at EL2

Now that we can handle faults triggered through VNCR_EL2, we need
to map the corresponding page at EL2. But where, you'll ask?

Since each CPU in the system can run a vcpu, we need a per-CPU
mapping. For that, we carve a NR_CPUS range in the fixmap, giving
us a per-CPU va at which to map the guest's VNCR's page.

The mapping occurs both on vcpu load and on the back of a fault,
both generating a request that will take care of the mapping.
That mapping will also get dropped on vcpu put.

Yes, this is a bit heavy handed, but it is simple. Eventually,
we may want to have a per-VM, per-CPU mapping, which would avoid
all the TLBI overhead.

Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Link: https://lore.kernel.org/r/20250514103501.2225951-11-maz@kernel.org
Signed-off-by: Marc Zyngier <maz@kernel.org>
This commit is contained in:
Marc Zyngier 2025-05-14 11:34:53 +01:00
parent 069a05e535
commit 2a359e0725
4 changed files with 103 additions and 9 deletions

View File

@ -48,6 +48,12 @@ enum fixed_addresses {
FIX_EARLYCON_MEM_BASE,
FIX_TEXT_POKE0,
#ifdef CONFIG_KVM
/* One slot per CPU, mapping the guest's VNCR page at EL2. */
FIX_VNCR_END,
FIX_VNCR = FIX_VNCR_END + NR_CPUS,
#endif
#ifdef CONFIG_ACPI_APEI_GHES
/* Used for GHES mapping from assorted contexts */
FIX_APEI_GHES_IRQ,

View File

@ -658,6 +658,7 @@ struct kvm_host_data {
#define KVM_HOST_DATA_FLAG_TRBE_ENABLED 4
#define KVM_HOST_DATA_FLAG_EL1_TRACING_CONFIGURED 5
#define KVM_HOST_DATA_FLAG_VCPU_IN_HYP_CONTEXT 6
#define KVM_HOST_DATA_FLAG_L1_VNCR_MAPPED 7
unsigned long flags;
struct kvm_cpu_context host_ctxt;

View File

@ -337,4 +337,11 @@ int __kvm_translate_va(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
int kvm_vcpu_allocate_vncr_tlb(struct kvm_vcpu *vcpu);
int kvm_handle_vncr_abort(struct kvm_vcpu *vcpu);
#define vncr_fixmap(c) \
({ \
u32 __c = (c); \
BUG_ON(__c >= NR_CPUS); \
(FIX_VNCR - __c); \
})
#endif /* __ARM64_KVM_NESTED_H */

View File

@ -8,6 +8,7 @@
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <asm/fixmap.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_mmu.h>
@ -704,23 +705,35 @@ void kvm_init_nested_s2_mmu(struct kvm_s2_mmu *mmu)
void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu)
{
/*
* The vCPU kept its reference on the MMU after the last put, keep
* rolling with it.
* If the vCPU kept its reference on the MMU after the last put,
* keep rolling with it.
*/
if (vcpu->arch.hw_mmu)
return;
if (is_hyp_ctxt(vcpu)) {
vcpu->arch.hw_mmu = &vcpu->kvm->arch.mmu;
if (!vcpu->arch.hw_mmu)
vcpu->arch.hw_mmu = &vcpu->kvm->arch.mmu;
} else {
write_lock(&vcpu->kvm->mmu_lock);
vcpu->arch.hw_mmu = get_s2_mmu_nested(vcpu);
write_unlock(&vcpu->kvm->mmu_lock);
if (!vcpu->arch.hw_mmu) {
scoped_guard(write_lock, &vcpu->kvm->mmu_lock)
vcpu->arch.hw_mmu = get_s2_mmu_nested(vcpu);
}
if (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_NV)
kvm_make_request(KVM_REQ_MAP_L1_VNCR_EL2, vcpu);
}
}
void kvm_vcpu_put_hw_mmu(struct kvm_vcpu *vcpu)
{
/* Unconditionally drop the VNCR mapping if we have one */
if (host_data_test_flag(L1_VNCR_MAPPED)) {
BUG_ON(vcpu->arch.vncr_tlb->cpu != smp_processor_id());
BUG_ON(is_hyp_ctxt(vcpu));
clear_fixmap(vncr_fixmap(vcpu->arch.vncr_tlb->cpu));
vcpu->arch.vncr_tlb->cpu = -1;
host_data_clear_flag(L1_VNCR_MAPPED);
}
/*
* Keep a reference on the associated stage-2 MMU if the vCPU is
* scheduling out and not in WFI emulation, suggesting it is likely to
@ -1042,6 +1055,70 @@ int kvm_handle_vncr_abort(struct kvm_vcpu *vcpu)
return 1;
}
static void kvm_map_l1_vncr(struct kvm_vcpu *vcpu)
{
struct vncr_tlb *vt = vcpu->arch.vncr_tlb;
pgprot_t prot;
guard(preempt)();
guard(read_lock)(&vcpu->kvm->mmu_lock);
/*
* The request to map VNCR may have raced against some other
* event, such as an interrupt, and may not be valid anymore.
*/
if (is_hyp_ctxt(vcpu))
return;
/*
* Check that the pseudo-TLB is valid and that VNCR_EL2 still
* contains the expected value. If it doesn't, we simply bail out
* without a mapping -- a transformed MSR/MRS will generate the
* fault and allows us to populate the pseudo-TLB.
*/
if (!vt->valid)
return;
if (read_vncr_el2(vcpu) != vt->gva)
return;
if (vt->wr.nG) {
u64 tcr = vcpu_read_sys_reg(vcpu, TCR_EL2);
u64 ttbr = ((tcr & TCR_A1) ?
vcpu_read_sys_reg(vcpu, TTBR1_EL2) :
vcpu_read_sys_reg(vcpu, TTBR0_EL2));
u16 asid;
asid = FIELD_GET(TTBR_ASID_MASK, ttbr);
if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, ASIDBITS, 16) ||
!(tcr & TCR_ASID16))
asid &= GENMASK(7, 0);
if (asid != vt->wr.asid)
return;
}
vt->cpu = smp_processor_id();
if (vt->wr.pw && vt->wr.pr)
prot = PAGE_KERNEL;
else if (vt->wr.pr)
prot = PAGE_KERNEL_RO;
else
prot = PAGE_NONE;
/*
* We can't map write-only (or no permission at all) in the kernel,
* but the guest can do it if using POE, so we'll have to turn a
* translation fault into a permission fault at runtime.
* FIXME: WO doesn't work at all, need POE support in the kernel.
*/
if (pgprot_val(prot) != pgprot_val(PAGE_NONE)) {
__set_fixmap(vncr_fixmap(vt->cpu), vt->hpa, prot);
host_data_set_flag(L1_VNCR_MAPPED);
}
}
/*
* Our emulated CPU doesn't support all the possible features. For the
* sake of simplicity (and probably mental sanity), wipe out a number
@ -1582,6 +1659,9 @@ void check_nested_vcpu_requests(struct kvm_vcpu *vcpu)
write_unlock(&vcpu->kvm->mmu_lock);
}
if (kvm_check_request(KVM_REQ_MAP_L1_VNCR_EL2, vcpu))
kvm_map_l1_vncr(vcpu);
/* Must be last, as may switch context! */
if (kvm_check_request(KVM_REQ_GUEST_HYP_IRQ_PENDING, vcpu))
kvm_inject_nested_irq(vcpu);