diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index ca266a4d9d50..0e57dc1881e0 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -18,6 +18,7 @@ #include #include #include +#include #define KVM_HOST_S2_FLAGS (KVM_PGTABLE_S2_AS_S1 | KVM_PGTABLE_S2_IDMAP) @@ -612,6 +613,39 @@ static int host_stage2_idmap(u64 addr) return ret; } +static void host_inject_mem_abort(struct kvm_cpu_context *host_ctxt) +{ + u64 ec, esr, spsr; + + esr = read_sysreg_el2(SYS_ESR); + spsr = read_sysreg_el2(SYS_SPSR); + + /* Repaint the ESR to report a same-level fault if taken from EL1 */ + if ((spsr & PSR_MODE_MASK) != PSR_MODE_EL0t) { + ec = ESR_ELx_EC(esr); + if (ec == ESR_ELx_EC_DABT_LOW) + ec = ESR_ELx_EC_DABT_CUR; + else if (ec == ESR_ELx_EC_IABT_LOW) + ec = ESR_ELx_EC_IABT_CUR; + else + WARN_ON(1); + esr &= ~ESR_ELx_EC_MASK; + esr |= ec << ESR_ELx_EC_SHIFT; + } + + /* + * Since S1PTW should only ever be set for stage-2 faults, we're pretty + * much guaranteed that it won't be set in ESR_EL1 by the hardware. So, + * let's use that bit to allow the host abort handler to differentiate + * this abort from normal userspace faults. + * + * Note: although S1PTW is RES0 at EL1, it is guaranteed by the + * architecture to be backed by flops, so it should be safe to use. + */ + esr |= ESR_ELx_S1PTW; + inject_host_exception(esr); +} + void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt) { struct kvm_vcpu_fault_info fault; @@ -635,6 +669,9 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt) addr = FIELD_GET(HPFAR_EL2_FIPA, fault.hpfar_el2) << 12; switch (host_stage2_idmap(addr)) { + case -EPERM: + host_inject_mem_abort(host_ctxt); + fallthrough; case -EEXIST: case 0: break; diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index be9dab2c7d6a..3abfc7272d63 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -43,6 +43,7 @@ #include #include #include +#include struct fault_info { int (*fn)(unsigned long far, unsigned long esr, @@ -269,6 +270,15 @@ static inline bool is_el1_permission_fault(unsigned long addr, unsigned long esr return false; } +static bool is_pkvm_stage2_abort(unsigned int esr) +{ + /* + * S1PTW should only ever be set in ESR_EL1 if the pkvm hypervisor + * injected a stage-2 abort -- see host_inject_mem_abort(). + */ + return is_pkvm_initialized() && (esr & ESR_ELx_S1PTW); +} + static bool __kprobes is_spurious_el1_translation_fault(unsigned long addr, unsigned long esr, struct pt_regs *regs) @@ -279,6 +289,9 @@ static bool __kprobes is_spurious_el1_translation_fault(unsigned long addr, if (!is_el1_data_abort(esr) || !esr_fsc_is_translation_fault(esr)) return false; + if (is_pkvm_stage2_abort(esr)) + return false; + local_irq_save(flags); asm volatile("at s1e1r, %0" :: "r" (addr)); isb(); @@ -395,6 +408,8 @@ static void __do_kernel_fault(unsigned long addr, unsigned long esr, msg = "read from unreadable memory"; } else if (addr < PAGE_SIZE) { msg = "NULL pointer dereference"; + } else if (is_pkvm_stage2_abort(esr)) { + msg = "access to hypervisor-protected memory"; } else { if (esr_fsc_is_translation_fault(esr) && kfence_handle_page_fault(addr, esr & ESR_ELx_WNR, regs)) @@ -621,6 +636,13 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, addr, esr, regs); } + if (is_pkvm_stage2_abort(esr)) { + if (!user_mode(regs)) + goto no_context; + arm64_force_sig_fault(SIGSEGV, SEGV_ACCERR, far, "stage-2 fault"); + return 0; + } + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); if (!(mm_flags & FAULT_FLAG_USER))