From cf6348af645bd8e38758114e6afcc406c5bb515f Mon Sep 17 00:00:00 2001 From: Sebastian Ene Date: Mon, 30 Mar 2026 10:54:41 +0000 Subject: [PATCH 1/4] KVM: arm64: Prevent the host from using an smc with imm16 != 0 The ARM Service Calling Convention (SMCCC) specifies that the function identifier and parameters should be passed in registers, leaving the 16-bit immediate field un-handled in pKVM when an SMC instruction is trapped. Since the HVC is a private interface between EL2 and the host, enforce the host kernel running under pKVM to use an immediate value of 0 only when using SMCs to make it clear for non-compliant software talking to Trustzone that we only use SMCCC. Signed-off-by: Sebastian Ene Reviewed-by: Vincent Donnefort Link: https://patch.msgid.link/20260330105441.3226904-1-sebastianene@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/hyp-main.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index e7790097db93..461cf5cb5ac7 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -676,8 +676,14 @@ static void default_host_smc_handler(struct kvm_cpu_context *host_ctxt) static void handle_host_smc(struct kvm_cpu_context *host_ctxt) { DECLARE_REG(u64, func_id, host_ctxt, 0); + u64 esr = read_sysreg_el2(SYS_ESR); bool handled; + if (esr & ESR_ELx_xVC_IMM_MASK) { + cpu_reg(host_ctxt, 0) = SMCCC_RET_NOT_SUPPORTED; + goto exit_skip_instr; + } + func_id &= ~ARM_SMCCC_CALL_HINTS; handled = kvm_host_psci_handler(host_ctxt, func_id); @@ -686,6 +692,7 @@ static void handle_host_smc(struct kvm_cpu_context *host_ctxt) if (!handled) default_host_smc_handler(host_ctxt); +exit_skip_instr: /* SMC was trapped, move ELR past the current PC. */ kvm_skip_host_instr(); } From 2fc0f3e2b9a9f397554ffe86e8f6eb0e2507ec6e Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 27 Mar 2026 19:27:56 +0000 Subject: [PATCH 2/4] KVM: arm64: Don't leave mmu->pgt dangling on kvm_init_stage2_mmu() error If kvm_init_stage2_mmu() fails to allocate 'mmu->last_vcpu_ran', it destroys the newly allocated stage-2 page-table before returning ENOMEM. Unfortunately, it also leaves a dangling pointer in 'mmu->pgt' which points at the freed 'kvm_pgtable' structure. This is likely to confuse the kvm_vcpu_init_nested() failure path which can double-free the structure if it finds it via kvm_free_stage2_pgd(). Ensure that the dangling 'mmu->pgt' pointer is cleared when returning an error from kvm_init_stage2_mmu(). Link: https://sashiko.dev/#/patchset/20260327140039.21228-1-will%40kernel.org?patch=12265 Signed-off-by: Will Deacon Link: https://patch.msgid.link/20260327192758.21739-2-will@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/kvm/mmu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 17d64a1e11e5..34e9d897d08b 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1013,6 +1013,7 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t out_destroy_pgtable: kvm_stage2_destroy(pgt); + mmu->pgt = NULL; out_free_pgtable: kfree(pgt); return err; From a3ca3bfd01b7ee9f54ed85718a6d553cdd87050e Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 27 Mar 2026 19:27:57 +0000 Subject: [PATCH 3/4] KVM: arm64: Destroy stage-2 page-table in kvm_arch_destroy_vm() kvm_arch_destroy_vm() can be called on the kvm_create_vm() error path after we have failed to register the MMU notifiers for the new VM. In this case, we cannot rely on the MMU ->release() notifier to call kvm_arch_flush_shadow_all() and so the stage-2 page-table allocated in kvm_arch_init_vm() will be leaked. Explicitly destroy the stage-2 page-table in kvm_arch_destroy_vm(), so that we clean up after kvm_arch_destroy_vm() without relying on the MMU notifiers. Link: https://sashiko.dev/#/patchset/20260327140039.21228-1-will%40kernel.org?patch=12265 Signed-off-by: Will Deacon Link: https://patch.msgid.link/20260327192758.21739-3-will@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/kvm/arm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 410ffd41fd73..29bfa79555b2 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -301,6 +301,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) if (is_protected_kvm_enabled()) pkvm_destroy_hyp_vm(kvm); + kvm_uninit_stage2_mmu(kvm); kvm_destroy_mpidr_data(kvm); kfree(kvm->arch.sysreg_masks); From 03db5f05d4c76d76b32a9d26001e2ec6252f74f8 Mon Sep 17 00:00:00 2001 From: "Zenghui Yu (Huawei)" Date: Tue, 17 Mar 2026 21:15:58 +0800 Subject: [PATCH 4/4] KVM: arm64: selftests: Avoid testing the IMPDEF behavior It turned out that we can't really force KVM to use the "slow" path when emulating AT instructions [1]. We should therefore avoid testing the IMPDEF behavior (i.e., TEST_ACCESS_FLAG - address translation instructions are permitted to update AF but not required). Remove it and improve the comment a bit. [1] https://lore.kernel.org/r/b951dcfb-0ad1-4d7b-b6ce-d54b272dd9be@linux.dev Signed-off-by: Zenghui Yu (Huawei) Link: https://patch.msgid.link/20260317131558.52751-1-zenghui.yu@linux.dev Signed-off-by: Marc Zyngier --- tools/testing/selftests/kvm/arm64/at.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/tools/testing/selftests/kvm/arm64/at.c b/tools/testing/selftests/kvm/arm64/at.c index c8ee6f520734..ce5d312ef6ba 100644 --- a/tools/testing/selftests/kvm/arm64/at.c +++ b/tools/testing/selftests/kvm/arm64/at.c @@ -13,7 +13,6 @@ enum { CLEAR_ACCESS_FLAG, - TEST_ACCESS_FLAG, }; static u64 *ptep_hva; @@ -49,7 +48,6 @@ do { \ GUEST_ASSERT_EQ(FIELD_GET(SYS_PAR_EL1_ATTR, par), MAIR_ATTR_NORMAL); \ GUEST_ASSERT_EQ(FIELD_GET(SYS_PAR_EL1_SH, par), PTE_SHARED >> 8); \ GUEST_ASSERT_EQ(par & SYS_PAR_EL1_PA, TEST_ADDR); \ - GUEST_SYNC(TEST_ACCESS_FLAG); \ } \ } while (0) @@ -85,10 +83,6 @@ static void guest_code(void) if (!SYS_FIELD_GET(ID_AA64MMFR1_EL1, HAFDBS, read_sysreg(id_aa64mmfr1_el1))) GUEST_DONE(); - /* - * KVM's software PTW makes the implementation choice that the AT - * instruction sets the access flag. - */ sysreg_clear_set(tcr_el1, 0, TCR_HA); isb(); test_at(false); @@ -102,8 +96,8 @@ static void handle_sync(struct kvm_vcpu *vcpu, struct ucall *uc) case CLEAR_ACCESS_FLAG: /* * Delete + reinstall the memslot to invalidate stage-2 - * mappings of the stage-1 page tables, forcing KVM to - * use the 'slow' AT emulation path. + * mappings of the stage-1 page tables, allowing KVM to + * potentially use the 'slow' AT emulation path. * * This and clearing the access flag from host userspace * ensures that the access flag cannot be set speculatively @@ -112,10 +106,6 @@ static void handle_sync(struct kvm_vcpu *vcpu, struct ucall *uc) clear_bit(__ffs(PTE_AF), ptep_hva); vm_mem_region_reload(vcpu->vm, vcpu->vm->memslots[MEM_REGION_PT]); break; - case TEST_ACCESS_FLAG: - TEST_ASSERT(test_bit(__ffs(PTE_AF), ptep_hva), - "Expected access flag to be set (desc: %lu)", *ptep_hva); - break; default: TEST_FAIL("Unexpected SYNC arg: %lu", uc->args[1]); }