From 5e8b511c39f35dcbf2e548a75d49782778c2df48 Mon Sep 17 00:00:00 2001 From: Sascha Bischoff Date: Tue, 6 Jan 2026 16:52:10 +0000 Subject: [PATCH 01/10] KVM: arm64: gic: Check for vGICv3 when clearing TWI Explicitly check for the vgic being v3 when disabling TWI. Failure to check this can result in using the wrong view of the vgic CPU IF union causing undesirable/unexpected behaviour. Signed-off-by: Sascha Bischoff Reviewed-by: Marc Zyngier Link: https://msgid.link/20260106165154.3321753-1-sascha.bischoff@arm.com Signed-off-by: Oliver Upton --- arch/arm64/kvm/arm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 4f80da0c0d1d..620a465248d1 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -569,6 +569,7 @@ static bool kvm_vcpu_should_clear_twi(struct kvm_vcpu *vcpu) return kvm_wfi_trap_policy == KVM_WFX_NOTRAP; return single_task_running() && + vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3 && (atomic_read(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count) || vcpu->kvm->arch.vgic.nassgireq); } From 8d8e882c2b4b73a3f894c3cad74718b633e3d166 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 10 Dec 2025 17:30:19 +0000 Subject: [PATCH 02/10] KVM: arm64: Fix EL2 S1 XN handling for hVHE setups The current XN implementation is tied to the EL2 translation regime, and fall flat on its face with the EL2&0 one that is used for hVHE, as the permission bit for privileged execution is a different one. Fixes: 6537565fd9b7f ("KVM: arm64: Adjust EL2 stage-1 leaf AP bits when ARM64_KVM_HVHE is set") Signed-off-by: Marc Zyngier Reviewed-by: Fuad Tabba Link: https://msgid.link/20251210173024.561160-2-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_pgtable.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index fc02de43c68d..be68b8969206 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -87,7 +87,15 @@ typedef u64 kvm_pte_t; #define KVM_PTE_LEAF_ATTR_HI_SW GENMASK(58, 55) -#define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54) +#define __KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54) +#define __KVM_PTE_LEAF_ATTR_HI_S1_UXN BIT(54) +#define __KVM_PTE_LEAF_ATTR_HI_S1_PXN BIT(53) + +#define KVM_PTE_LEAF_ATTR_HI_S1_XN \ + ({ cpus_have_final_cap(ARM64_KVM_HVHE) ? \ + (__KVM_PTE_LEAF_ATTR_HI_S1_UXN | \ + __KVM_PTE_LEAF_ATTR_HI_S1_PXN) : \ + __KVM_PTE_LEAF_ATTR_HI_S1_XN; }) #define KVM_PTE_LEAF_ATTR_HI_S2_XN GENMASK(54, 53) From 145cc42fe1217c66174c44c4034cc0fe3040bbb0 Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Tue, 16 Dec 2025 10:30:50 +0000 Subject: [PATCH 03/10] KVM: arm64: Copy FGT traps to unprotected pKVM VCPU on VCPU load Commit fb10ddf35c1c ("KVM: arm64: Compute per-vCPU FGTs at vcpu_load()") introduced per-VCPU FGT traps. For an unprotected pKVM VCPU, the untrusted host FGT configuration is copied in pkvm_vcpu_init_traps(), which is called from __pkvm_init_vcpu(). __pkvm_init_vcpu() is called once per VCPU (when the VCPU is first run) which means that the uninitialized, zero, values for the FGT registers end up being used for the entire lifetime of the VCPU. This causes both unwanted traps (for the inverse polarity trap bits) and the guest being allowed to access registers it shouldn't. Fix it by copying the FGT traps for unprotected pKVM VCPUs when the untrusted host loads the VCPU. Fixes: fb10ddf35c1c ("KVM: arm64: Compute per-vCPU FGTs at vcpu_load()") Acked-by: Will Deacon Tested-by: Fuad Tabba Reviewed-by: Fuad Tabba Signed-off-by: Alexandru Elisei Reviewed-by: Marc Zyngier Link: https://msgid.link/20251216103053.47224-2-alexandru.elisei@arm.com Signed-off-by: Oliver Upton --- arch/arm64/kvm/hyp/nvhe/hyp-main.c | 3 +++ arch/arm64/kvm/hyp/nvhe/pkvm.c | 1 - 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index a7c689152f68..8ffbbce5e2ed 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -180,6 +180,9 @@ static void handle___pkvm_vcpu_load(struct kvm_cpu_context *host_ctxt) /* Propagate WFx trapping flags */ hyp_vcpu->vcpu.arch.hcr_el2 &= ~(HCR_TWE | HCR_TWI); hyp_vcpu->vcpu.arch.hcr_el2 |= hcr_el2 & (HCR_TWE | HCR_TWI); + } else { + memcpy(&hyp_vcpu->vcpu.arch.fgt, hyp_vcpu->host_vcpu->arch.fgt, + sizeof(hyp_vcpu->vcpu.arch.fgt)); } } diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 8911338961c5..12b2acfbcfd1 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -172,7 +172,6 @@ static int pkvm_vcpu_init_traps(struct pkvm_hyp_vcpu *hyp_vcpu) /* Trust the host for non-protected vcpu features. */ vcpu->arch.hcrx_el2 = host_vcpu->arch.hcrx_el2; - memcpy(vcpu->arch.fgt, host_vcpu->arch.fgt, sizeof(vcpu->arch.fgt)); return 0; } From aba963cb98c6d9d42490262a39c3d383cfebc6a9 Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Tue, 16 Dec 2025 10:30:51 +0000 Subject: [PATCH 04/10] KVM: arm64: Inject UNDEF for a register trap without accessor Configuring a register trap without specifying an accessor function is abviously a bug. Instead of calling die() when that happens, let's be a bit more helpful and print the register encoding. Also inject an undefined instruction exception in the guest, similar to other unhandled register accesses. Signed-off-by: Alexandru Elisei Reviewed-by: Marc Zyngier Tested-by: Fuad Tabba Reviewed-by: Fuad Tabba Link: https://msgid.link/20251216103053.47224-3-alexandru.elisei@arm.com Signed-off-by: Oliver Upton --- arch/arm64/kvm/sys_regs.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index c8fd7c6a12a1..88a57ca36d96 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -4668,7 +4668,10 @@ static void perform_access(struct kvm_vcpu *vcpu, * that we don't know how to handle. This certainly qualifies * as a gross bug that should be fixed right away. */ - BUG_ON(!r->access); + if (!r->access) { + bad_trap(vcpu, params, r, "register access"); + return; + } /* Skip instruction if instructed so */ if (likely(r->access(vcpu, params, r))) From 26cdea4893c2b26cad58926d8f29792386219332 Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Tue, 16 Dec 2025 10:30:52 +0000 Subject: [PATCH 05/10] KVM: arm64: Remove extra argument for __pvkm_host_{share,unshare}_hyp() __pvkm_host_share_hyp() and __pkvm_host_unshare_hyp() both have one parameter, the pfn, not two. Even though correctness isn't impacted because the SMCCC handlers pass the first argument and ignore the second one, let's call the functions with the proper number of arguments. Reviewed-by: Fuad Tabba Signed-off-by: Alexandru Elisei Reviewed-by: Marc Zyngier Tested-by: Fuad Tabba Link: https://msgid.link/20251216103053.47224-4-alexandru.elisei@arm.com Signed-off-by: Oliver Upton --- arch/arm64/kvm/mmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 48d7c372a4cd..124404eb208d 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -497,7 +497,7 @@ static int share_pfn_hyp(u64 pfn) this->count = 1; rb_link_node(&this->node, parent, node); rb_insert_color(&this->node, &hyp_shared_pfns); - ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp, pfn, 1); + ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp, pfn); unlock: mutex_unlock(&hyp_shared_pfns_lock); @@ -523,7 +523,7 @@ static int unshare_pfn_hyp(u64 pfn) rb_erase(&this->node, &hyp_shared_pfns); kfree(this); - ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp, pfn, 1); + ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp, pfn); unlock: mutex_unlock(&hyp_shared_pfns_lock); From d252c7898ebccef52665514be07b90987de7ffc7 Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Tue, 16 Dec 2025 10:30:53 +0000 Subject: [PATCH 06/10] KVM: arm64: Remove unused parameter in synchronize_vcpu_pstate() synchronize_vcpu_pstate() doesn't make use of the reference to exit_code, remove the parameter. Reviewed-by: Fuad Tabba Signed-off-by: Alexandru Elisei Reviewed-by: Marc Zyngier Tested-by: Fuad Tabba Link: https://msgid.link/20251216103053.47224-5-alexandru.elisei@arm.com Signed-off-by: Oliver Upton --- arch/arm64/kvm/hyp/include/hyp/switch.h | 2 +- arch/arm64/kvm/hyp/nvhe/switch.c | 2 +- arch/arm64/kvm/hyp/vhe/switch.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index c5d5e5b86eaf..afecbdd3c1e9 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -854,7 +854,7 @@ static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code, return false; } -static inline void synchronize_vcpu_pstate(struct kvm_vcpu *vcpu, u64 *exit_code) +static inline void synchronize_vcpu_pstate(struct kvm_vcpu *vcpu) { /* * Check for the conditions of Cortex-A510's #2077057. When these occur diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index d3b9ec8a7c28..779089e42681 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -211,7 +211,7 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) { const exit_handler_fn *handlers = kvm_get_exit_handler_array(vcpu); - synchronize_vcpu_pstate(vcpu, exit_code); + synchronize_vcpu_pstate(vcpu); /* * Some guests (e.g., protected VMs) are not be allowed to run in diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 9984c492305a..9db3f11a4754 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -536,7 +536,7 @@ static const exit_handler_fn hyp_exit_handlers[] = { static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) { - synchronize_vcpu_pstate(vcpu, exit_code); + synchronize_vcpu_pstate(vcpu); /* * If we were in HYP context on entry, adjust the PSTATE view From 9cb2c20f06c300f92a831e4c374e353b33c5582b Mon Sep 17 00:00:00 2001 From: Dongxu Sun Date: Fri, 9 Jan 2026 16:02:26 +0800 Subject: [PATCH 07/10] KVM: arm64: Remove unused vcpu_{clear,set}_wfx_traps() Function vcpu_{clear,set}_wfx_traps() are unused since commit 0b5afe05377d7 ("KVM: arm64: Add early_param to control WFx trapping"). Remove it. Reviewed-by: Zenghui Yu Signed-off-by: Dongxu Sun Link: https://msgid.link/20260109080226.761107-1-sundongxu1024@163.com Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_emulate.h | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index c9eab316398e..55d34192a8de 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -119,22 +119,6 @@ static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu) return (unsigned long *)&vcpu->arch.hcr_el2; } -static inline void vcpu_clear_wfx_traps(struct kvm_vcpu *vcpu) -{ - vcpu->arch.hcr_el2 &= ~HCR_TWE; - if (atomic_read(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count) || - vcpu->kvm->arch.vgic.nassgireq) - vcpu->arch.hcr_el2 &= ~HCR_TWI; - else - vcpu->arch.hcr_el2 |= HCR_TWI; -} - -static inline void vcpu_set_wfx_traps(struct kvm_vcpu *vcpu) -{ - vcpu->arch.hcr_el2 |= HCR_TWE; - vcpu->arch.hcr_el2 |= HCR_TWI; -} - static inline unsigned long vcpu_get_vsesr(struct kvm_vcpu *vcpu) { return vcpu->arch.vsesr_el2; From 9e27085c33cca7ad26bec0af2c17aab072dd802e Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Thu, 8 Jan 2026 12:42:30 -0800 Subject: [PATCH 08/10] KVM: arm64: nv: Respect stage-2 write permssion when setting stage-1 AF Naturally, updating the Access Flag in a stage-1 descriptor requires write permission at stage-2, although this isn't actually enforced in KVM's software PTW. Generate a stage-2 permission fault if the stage-1 walk attempts to update the descriptor and its corresponding stage-2 translation lacks write permission. Fixes: bff8aa213dee ("KVM: arm64: Implement HW access flag management in stage-1 SW PTW") Reviewed-by: Marc Zyngier Link: https://msgid.link/20260108204230.677172-1-oupton@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/at.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index 53bf70126f81..808d26bed182 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -403,6 +403,7 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, struct s1_walk_result *wr, u64 va) { u64 va_top, va_bottom, baddr, desc, new_desc, ipa; + struct kvm_s2_trans s2_trans = {}; int level, stride, ret; level = wi->sl; @@ -420,8 +421,6 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, ipa = baddr | index; if (wi->s2) { - struct kvm_s2_trans s2_trans = {}; - ret = kvm_walk_nested_s2(vcpu, ipa, &s2_trans); if (ret) { fail_s1_walk(wr, @@ -515,6 +514,11 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, new_desc |= PTE_AF; if (new_desc != desc) { + if (wi->s2 && !kvm_s2_trans_writable(&s2_trans)) { + fail_s1_walk(wr, ESR_ELx_FSC_PERM_L(level), true); + return -EPERM; + } + ret = kvm_swap_s1_desc(vcpu, ipa, desc, new_desc, wi); if (ret) return ret; From 86364832ba6f2777db98391060b2d7f69938ad9b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 7 Jan 2026 12:46:00 +0000 Subject: [PATCH 09/10] KVM: arm64: Don't blindly set set PSTATE.PAN on guest exit We set PSTATE.PAN to 1 on exiting from a guest if PAN support has been compiled in and that it exists on the HW. However, this is not necessarily correct. In a nVHE configuration, there is no notion of PAN at EL2, so setting PSTATE.PAN to anything is pointless. Furthermore, not setting PAN to 0 when CONFIG_ARM64_PAN isn't set means we run with the *guest's* PSTATE.PAN (which might be set to 1), and we will explode on the next userspace access. Yes, the architecture is delightful in that particular corner. Fix the whole thing by always setting PAN to something when running VHE (which implies PAN support), and only ignore it when running nVHE. Reported-by: Mark Rutland Signed-off-by: Marc Zyngier Link: https://msgid.link/20260107124600.2736328-1-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_asm.h | 2 ++ arch/arm64/include/asm/sysreg.h | 3 ++- arch/arm64/kernel/image-vars.h | 1 + arch/arm64/kvm/hyp/entry.S | 4 +++- arch/arm64/kvm/va_layout.c | 28 ++++++++++++++++++++++++++++ 5 files changed, 36 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index a1ad12c72ebf..ce516d8187b1 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -300,6 +300,8 @@ void kvm_get_kimage_voffset(struct alt_instr *alt, __le32 *origptr, __le32 *updptr, int nr_inst); void kvm_compute_final_ctr_el0(struct alt_instr *alt, __le32 *origptr, __le32 *updptr, int nr_inst); +void kvm_pan_patch_el2_entry(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst); void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, u64 elr_virt, u64 elr_phys, u64 par, uintptr_t vcpu, u64 far, u64 hpfar); diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 9df51accbb02..106b15eb232a 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -91,7 +91,8 @@ */ #define pstate_field(op1, op2) ((op1) << Op1_shift | (op2) << Op2_shift) #define PSTATE_Imm_shift CRm_shift -#define SET_PSTATE(x, r) __emit_inst(0xd500401f | PSTATE_ ## r | ((!!x) << PSTATE_Imm_shift)) +#define ENCODE_PSTATE(x, r) (0xd500401f | PSTATE_ ## r | ((!!x) << PSTATE_Imm_shift)) +#define SET_PSTATE(x, r) __emit_inst(ENCODE_PSTATE(x, r)) #define PSTATE_PAN pstate_field(0, 4) #define PSTATE_UAO pstate_field(0, 3) diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 85bc629270bd..211f0e2e55e2 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -86,6 +86,7 @@ KVM_NVHE_ALIAS(kvm_patch_vector_branch); KVM_NVHE_ALIAS(kvm_update_va_mask); KVM_NVHE_ALIAS(kvm_get_kimage_voffset); KVM_NVHE_ALIAS(kvm_compute_final_ctr_el0); +KVM_NVHE_ALIAS(kvm_pan_patch_el2_entry); KVM_NVHE_ALIAS(spectre_bhb_patch_loop_iter); KVM_NVHE_ALIAS(spectre_bhb_patch_loop_mitigation_enable); KVM_NVHE_ALIAS(spectre_bhb_patch_wa3); diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index 9f4e8d68ab50..d1ccddf9e87d 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -126,7 +126,9 @@ SYM_INNER_LABEL(__guest_exit, SYM_L_GLOBAL) add x1, x1, #VCPU_CONTEXT - ALTERNATIVE(nop, SET_PSTATE_PAN(1), ARM64_HAS_PAN, CONFIG_ARM64_PAN) + alternative_cb ARM64_ALWAYS_SYSTEM, kvm_pan_patch_el2_entry + nop + alternative_cb_end // Store the guest regs x2 and x3 stp x2, x3, [x1, #CPU_XREG_OFFSET(2)] diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c index 91b22a014610..bf888d150dc7 100644 --- a/arch/arm64/kvm/va_layout.c +++ b/arch/arm64/kvm/va_layout.c @@ -296,3 +296,31 @@ void kvm_compute_final_ctr_el0(struct alt_instr *alt, generate_mov_q(read_sanitised_ftr_reg(SYS_CTR_EL0), origptr, updptr, nr_inst); } + +void kvm_pan_patch_el2_entry(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst) +{ + /* + * If we're running at EL1 without hVHE, then SCTLR_EL2.SPAN means + * nothing to us (it is RES1), and we don't need to set PSTATE.PAN + * to anything useful. + */ + if (!is_kernel_in_hyp_mode() && !cpus_have_cap(ARM64_KVM_HVHE)) + return; + + /* + * Leap of faith: at this point, we must be running VHE one way or + * another, and FEAT_PAN is required to be implemented. If KVM + * explodes at runtime because your system does not abide by this + * requirement, call your favourite HW vendor, they have screwed up. + * + * We don't expect hVHE to access any userspace mapping, so always + * set PSTATE.PAN on enty. Same thing if we have PAN enabled on an + * EL2 kernel. Only force it to 0 if we have not configured PAN in + * the kernel (and you know this is really silly). + */ + if (cpus_have_cap(ARM64_KVM_HVHE) || IS_ENABLED(CONFIG_ARM64_PAN)) + *updptr = cpu_to_le32(ENCODE_PSTATE(1, PAN)); + else + *updptr = cpu_to_le32(ENCODE_PSTATE(0, PAN)); +} From 19cffd16ed6489770272ba383ff3aaec077e01ed Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 5 Jan 2026 15:49:09 +0000 Subject: [PATCH 10/10] KVM: arm64: Invert KVM_PGTABLE_WALK_HANDLE_FAULT to fix pKVM walkers Commit ddcadb297ce5 ("KVM: arm64: Ignore EAGAIN for walks outside of a fault") introduced a new walker flag ('KVM_PGTABLE_WALK_HANDLE_FAULT') to KVM's page-table code. When set, the walk logic maintains its previous behaviour of terminating a walk as soon as the visitor callback returns an error. However, when the flag is clear, the walk will continue if the visitor returns -EAGAIN and the error is then suppressed and returned as zero to the caller. Clearing the flag is beneficial when write-protecting a range of IPAs with kvm_pgtable_stage2_wrprotect() but is not useful in any other cases, either because we are operating on a single page (e.g. kvm_pgtable_stage2_mkyoung() or kvm_phys_addr_ioremap()) or because the early termination is desirable (e.g. when mapping pages from a fault in user_mem_abort()). Subsequently, commit e912efed485a ("KVM: arm64: Introduce the EL1 pKVM MMU") hooked up pKVM's hypercall interface to the MMU code at EL1 but failed to propagate any of the walker flags. As a result, page-table walks at EL2 fail to set KVM_PGTABLE_WALK_HANDLE_FAULT even when the early termination semantics are desirable on the fault handling path. Rather than complicate the pKVM hypercall interface, invert the flag so that the whole thing can be simplified and only pass the new flag ('KVM_PGTABLE_WALK_IGNORE_EAGAIN') from the wrprotect code. Cc: Fuad Tabba Cc: Quentin Perret Cc: Marc Zyngier Cc: Oliver Upton Reviewed-by: Marc Zyngier Fixes: fce886a60207 ("KVM: arm64: Plumb the pKVM MMU in KVM") Signed-off-by: Will Deacon Reviewed-by: Quentin Perret Link: https://msgid.link/20260105154939.11041-2-will@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_pgtable.h | 6 +++--- arch/arm64/kvm/hyp/pgtable.c | 5 +++-- arch/arm64/kvm/mmu.c | 8 +++----- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index be68b8969206..c0ad262a8289 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -301,8 +301,8 @@ typedef bool (*kvm_pgtable_force_pte_cb_t)(u64 addr, u64 end, * children. * @KVM_PGTABLE_WALK_SHARED: Indicates the page-tables may be shared * with other software walkers. - * @KVM_PGTABLE_WALK_HANDLE_FAULT: Indicates the page-table walk was - * invoked from a fault handler. + * @KVM_PGTABLE_WALK_IGNORE_EAGAIN: Don't terminate the walk early if + * the walker returns -EAGAIN. * @KVM_PGTABLE_WALK_SKIP_BBM_TLBI: Visit and update table entries * without Break-before-make's * TLB invalidation. @@ -315,7 +315,7 @@ enum kvm_pgtable_walk_flags { KVM_PGTABLE_WALK_TABLE_PRE = BIT(1), KVM_PGTABLE_WALK_TABLE_POST = BIT(2), KVM_PGTABLE_WALK_SHARED = BIT(3), - KVM_PGTABLE_WALK_HANDLE_FAULT = BIT(4), + KVM_PGTABLE_WALK_IGNORE_EAGAIN = BIT(4), KVM_PGTABLE_WALK_SKIP_BBM_TLBI = BIT(5), KVM_PGTABLE_WALK_SKIP_CMO = BIT(6), }; diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 947ac1a951a5..9abc0a6cf448 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -144,7 +144,7 @@ static bool kvm_pgtable_walk_continue(const struct kvm_pgtable_walker *walker, * page table walk. */ if (r == -EAGAIN) - return !(walker->flags & KVM_PGTABLE_WALK_HANDLE_FAULT); + return walker->flags & KVM_PGTABLE_WALK_IGNORE_EAGAIN; return !r; } @@ -1262,7 +1262,8 @@ int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size) { return stage2_update_leaf_attrs(pgt, addr, size, 0, KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W, - NULL, NULL, 0); + NULL, NULL, + KVM_PGTABLE_WALK_IGNORE_EAGAIN); } void kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr, diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 124404eb208d..2caa97f87890 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1563,14 +1563,12 @@ static void adjust_nested_exec_perms(struct kvm *kvm, *prot &= ~KVM_PGTABLE_PROT_PX; } -#define KVM_PGTABLE_WALK_MEMABORT_FLAGS (KVM_PGTABLE_WALK_HANDLE_FAULT | KVM_PGTABLE_WALK_SHARED) - static int gmem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm_s2_trans *nested, struct kvm_memory_slot *memslot, bool is_perm) { bool write_fault, exec_fault, writable; - enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_MEMABORT_FLAGS; + enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_SHARED; enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R; struct kvm_pgtable *pgt = vcpu->arch.hw_mmu->pgt; unsigned long mmu_seq; @@ -1665,7 +1663,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm_pgtable *pgt; struct page *page; vm_flags_t vm_flags; - enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_MEMABORT_FLAGS; + enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_SHARED; if (fault_is_perm) fault_granule = kvm_vcpu_trap_get_perm_fault_granule(vcpu); @@ -1933,7 +1931,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, /* Resolve the access fault by making the page young again. */ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) { - enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_HANDLE_FAULT | KVM_PGTABLE_WALK_SHARED; + enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_SHARED; struct kvm_s2_mmu *mmu; trace_kvm_access_fault(fault_ipa);