linux/arch/arm64/kvm/hyp/nvhe/hyp-main.c
Marc Zyngier f05799491d KVM: arm64: pkvm: Adopt MARKER() to define host hypercall ranges
The EL2 code defines ranges of host hypercalls that are either
enabled at boot-time only, used by [nh]VHE KVM, or reserved to pKVM.

The way these ranges are delineated is error prone, as the enum symbols
defining the limits are expressed in terms of actual function symbols.
This means that should a new function be added, special care must be
taken to also update the limit symbol.

Improve this by reusing the mechanism introduced for the vcpu_sysreg
enum, which uses a MARKER() macro and some extra trickery to make
the limit symbol standalone. Crucially, the limit symbol has the
same value as the *following* symbol.

The handle_host_hcall() function is then updated to make use of
the new limit definitions and get rid of the brittle default
upper limit. This allows for some more strict checks at build
time, and the removal of an comparison at run time.

Tested-by: Fuad Tabba <tabba@google.com>
Reviewed-by: Fuad Tabba <tabba@google.com>
Link: https://patch.msgid.link/20260414160528.2218858-1-maz@kernel.org
Signed-off-by: Marc Zyngier <maz@kernel.org>
2026-04-18 09:07:13 +01:00

921 lines
25 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2020 - Google Inc
* Author: Andrew Scull <ascull@google.com>
*/
#include <hyp/adjust_pc.h>
#include <hyp/switch.h>
#include <asm/pgtable-types.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_host.h>
#include <asm/kvm_hyp.h>
#include <asm/kvm_hypevents.h>
#include <asm/kvm_mmu.h>
#include <nvhe/ffa.h>
#include <nvhe/mem_protect.h>
#include <nvhe/mm.h>
#include <nvhe/pkvm.h>
#include <nvhe/trace.h>
#include <nvhe/trap_handler.h>
DEFINE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
void __kvm_hyp_host_forward_smc(struct kvm_cpu_context *host_ctxt);
static void __hyp_sve_save_guest(struct kvm_vcpu *vcpu)
{
__vcpu_assign_sys_reg(vcpu, ZCR_EL1, read_sysreg_el1(SYS_ZCR));
/*
* On saving/restoring guest sve state, always use the maximum VL for
* the guest. The layout of the data when saving the sve state depends
* on the VL, so use a consistent (i.e., the maximum) guest VL.
*/
sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2);
__sve_save_state(vcpu_sve_pffr(vcpu), &vcpu->arch.ctxt.fp_regs.fpsr, true);
write_sysreg_s(sve_vq_from_vl(kvm_host_sve_max_vl) - 1, SYS_ZCR_EL2);
}
static void __hyp_sve_restore_host(void)
{
struct cpu_sve_state *sve_state = *host_data_ptr(sve_state);
/*
* On saving/restoring host sve state, always use the maximum VL for
* the host. The layout of the data when saving the sve state depends
* on the VL, so use a consistent (i.e., the maximum) host VL.
*
* Note that this constrains the PE to the maximum shared VL
* that was discovered, if we wish to use larger VLs this will
* need to be revisited.
*/
write_sysreg_s(sve_vq_from_vl(kvm_host_sve_max_vl) - 1, SYS_ZCR_EL2);
__sve_restore_state(sve_state->sve_regs + sve_ffr_offset(kvm_host_sve_max_vl),
&sve_state->fpsr,
true);
write_sysreg_el1(sve_state->zcr_el1, SYS_ZCR);
}
static void fpsimd_sve_flush(void)
{
*host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED;
}
static void fpsimd_sve_sync(struct kvm_vcpu *vcpu)
{
bool has_fpmr;
if (!guest_owns_fp_regs())
return;
/*
* Traps have been disabled by __deactivate_cptr_traps(), but there
* hasn't necessarily been a context synchronization event yet.
*/
isb();
if (vcpu_has_sve(vcpu))
__hyp_sve_save_guest(vcpu);
else
__fpsimd_save_state(&vcpu->arch.ctxt.fp_regs);
has_fpmr = kvm_has_fpmr(kern_hyp_va(vcpu->kvm));
if (has_fpmr)
__vcpu_assign_sys_reg(vcpu, FPMR, read_sysreg_s(SYS_FPMR));
if (system_supports_sve())
__hyp_sve_restore_host();
else
__fpsimd_restore_state(host_data_ptr(host_ctxt.fp_regs));
if (has_fpmr)
write_sysreg_s(*host_data_ptr(fpmr), SYS_FPMR);
*host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED;
}
static void flush_debug_state(struct pkvm_hyp_vcpu *hyp_vcpu)
{
struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;
hyp_vcpu->vcpu.arch.debug_owner = host_vcpu->arch.debug_owner;
if (kvm_guest_owns_debug_regs(&hyp_vcpu->vcpu))
hyp_vcpu->vcpu.arch.vcpu_debug_state = host_vcpu->arch.vcpu_debug_state;
else if (kvm_host_owns_debug_regs(&hyp_vcpu->vcpu))
hyp_vcpu->vcpu.arch.external_debug_state = host_vcpu->arch.external_debug_state;
}
static void sync_debug_state(struct pkvm_hyp_vcpu *hyp_vcpu)
{
struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;
if (kvm_guest_owns_debug_regs(&hyp_vcpu->vcpu))
host_vcpu->arch.vcpu_debug_state = hyp_vcpu->vcpu.arch.vcpu_debug_state;
else if (kvm_host_owns_debug_regs(&hyp_vcpu->vcpu))
host_vcpu->arch.external_debug_state = hyp_vcpu->vcpu.arch.external_debug_state;
}
static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
{
struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;
fpsimd_sve_flush();
flush_debug_state(hyp_vcpu);
hyp_vcpu->vcpu.arch.ctxt = host_vcpu->arch.ctxt;
hyp_vcpu->vcpu.arch.mdcr_el2 = host_vcpu->arch.mdcr_el2;
hyp_vcpu->vcpu.arch.hcr_el2 &= ~(HCR_TWI | HCR_TWE);
hyp_vcpu->vcpu.arch.hcr_el2 |= READ_ONCE(host_vcpu->arch.hcr_el2) &
(HCR_TWI | HCR_TWE);
hyp_vcpu->vcpu.arch.iflags = host_vcpu->arch.iflags;
hyp_vcpu->vcpu.arch.vsesr_el2 = host_vcpu->arch.vsesr_el2;
hyp_vcpu->vcpu.arch.vgic_cpu.vgic_v3 = host_vcpu->arch.vgic_cpu.vgic_v3;
hyp_vcpu->vcpu.arch.pid = host_vcpu->arch.pid;
}
static void sync_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
{
struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;
struct vgic_v3_cpu_if *hyp_cpu_if = &hyp_vcpu->vcpu.arch.vgic_cpu.vgic_v3;
struct vgic_v3_cpu_if *host_cpu_if = &host_vcpu->arch.vgic_cpu.vgic_v3;
unsigned int i;
fpsimd_sve_sync(&hyp_vcpu->vcpu);
sync_debug_state(hyp_vcpu);
host_vcpu->arch.ctxt = hyp_vcpu->vcpu.arch.ctxt;
host_vcpu->arch.hcr_el2 = hyp_vcpu->vcpu.arch.hcr_el2;
host_vcpu->arch.fault = hyp_vcpu->vcpu.arch.fault;
host_vcpu->arch.iflags = hyp_vcpu->vcpu.arch.iflags;
host_cpu_if->vgic_hcr = hyp_cpu_if->vgic_hcr;
host_cpu_if->vgic_vmcr = hyp_cpu_if->vgic_vmcr;
for (i = 0; i < hyp_cpu_if->used_lrs; ++i)
host_cpu_if->vgic_lr[i] = hyp_cpu_if->vgic_lr[i];
}
static void handle___pkvm_vcpu_load(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
DECLARE_REG(unsigned int, vcpu_idx, host_ctxt, 2);
DECLARE_REG(u64, hcr_el2, host_ctxt, 3);
struct pkvm_hyp_vcpu *hyp_vcpu;
hyp_vcpu = pkvm_load_hyp_vcpu(handle, vcpu_idx);
if (!hyp_vcpu)
return;
if (pkvm_hyp_vcpu_is_protected(hyp_vcpu)) {
/* Propagate WFx trapping flags */
hyp_vcpu->vcpu.arch.hcr_el2 &= ~(HCR_TWE | HCR_TWI);
hyp_vcpu->vcpu.arch.hcr_el2 |= hcr_el2 & (HCR_TWE | HCR_TWI);
} else {
memcpy(&hyp_vcpu->vcpu.arch.fgt, hyp_vcpu->host_vcpu->arch.fgt,
sizeof(hyp_vcpu->vcpu.arch.fgt));
}
}
static void handle___pkvm_vcpu_put(struct kvm_cpu_context *host_ctxt)
{
struct pkvm_hyp_vcpu *hyp_vcpu = pkvm_get_loaded_hyp_vcpu();
if (hyp_vcpu)
pkvm_put_hyp_vcpu(hyp_vcpu);
}
static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct kvm_vcpu *, host_vcpu, host_ctxt, 1);
int ret;
if (unlikely(is_protected_kvm_enabled())) {
struct pkvm_hyp_vcpu *hyp_vcpu = pkvm_get_loaded_hyp_vcpu();
/*
* KVM (and pKVM) doesn't support SME guests for now, and
* ensures that SME features aren't enabled in pstate when
* loading a vcpu. Therefore, if SME features enabled the host
* is misbehaving.
*/
if (unlikely(system_supports_sme() && read_sysreg_s(SYS_SVCR))) {
ret = -EINVAL;
goto out;
}
if (!hyp_vcpu) {
ret = -EINVAL;
goto out;
}
flush_hyp_vcpu(hyp_vcpu);
ret = __kvm_vcpu_run(&hyp_vcpu->vcpu);
sync_hyp_vcpu(hyp_vcpu);
} else {
struct kvm_vcpu *vcpu = kern_hyp_va(host_vcpu);
/* The host is fully trusted, run its vCPU directly. */
fpsimd_lazy_switch_to_guest(vcpu);
ret = __kvm_vcpu_run(vcpu);
fpsimd_lazy_switch_to_host(vcpu);
}
out:
cpu_reg(host_ctxt, 1) = ret;
}
static int pkvm_refill_memcache(struct pkvm_hyp_vcpu *hyp_vcpu)
{
struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;
return refill_memcache(&hyp_vcpu->vcpu.arch.pkvm_memcache,
host_vcpu->arch.pkvm_memcache.nr_pages,
&host_vcpu->arch.pkvm_memcache);
}
static void handle___pkvm_host_donate_guest(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(u64, pfn, host_ctxt, 1);
DECLARE_REG(u64, gfn, host_ctxt, 2);
struct pkvm_hyp_vcpu *hyp_vcpu;
int ret = -EINVAL;
hyp_vcpu = pkvm_get_loaded_hyp_vcpu();
if (!hyp_vcpu || !pkvm_hyp_vcpu_is_protected(hyp_vcpu))
goto out;
ret = pkvm_refill_memcache(hyp_vcpu);
if (ret)
goto out;
ret = __pkvm_host_donate_guest(pfn, gfn, hyp_vcpu);
out:
cpu_reg(host_ctxt, 1) = ret;
}
static void handle___pkvm_host_share_guest(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(u64, pfn, host_ctxt, 1);
DECLARE_REG(u64, gfn, host_ctxt, 2);
DECLARE_REG(u64, nr_pages, host_ctxt, 3);
DECLARE_REG(enum kvm_pgtable_prot, prot, host_ctxt, 4);
struct pkvm_hyp_vcpu *hyp_vcpu;
int ret = -EINVAL;
hyp_vcpu = pkvm_get_loaded_hyp_vcpu();
if (!hyp_vcpu || pkvm_hyp_vcpu_is_protected(hyp_vcpu))
goto out;
ret = pkvm_refill_memcache(hyp_vcpu);
if (ret)
goto out;
ret = __pkvm_host_share_guest(pfn, gfn, nr_pages, hyp_vcpu, prot);
out:
cpu_reg(host_ctxt, 1) = ret;
}
static void handle___pkvm_host_unshare_guest(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
DECLARE_REG(u64, gfn, host_ctxt, 2);
DECLARE_REG(u64, nr_pages, host_ctxt, 3);
struct pkvm_hyp_vm *hyp_vm;
int ret = -EINVAL;
hyp_vm = get_np_pkvm_hyp_vm(handle);
if (!hyp_vm)
goto out;
ret = __pkvm_host_unshare_guest(gfn, nr_pages, hyp_vm);
put_pkvm_hyp_vm(hyp_vm);
out:
cpu_reg(host_ctxt, 1) = ret;
}
static void handle___pkvm_host_relax_perms_guest(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(u64, gfn, host_ctxt, 1);
DECLARE_REG(enum kvm_pgtable_prot, prot, host_ctxt, 2);
struct pkvm_hyp_vcpu *hyp_vcpu;
int ret = -EINVAL;
hyp_vcpu = pkvm_get_loaded_hyp_vcpu();
if (!hyp_vcpu || pkvm_hyp_vcpu_is_protected(hyp_vcpu))
goto out;
ret = __pkvm_host_relax_perms_guest(gfn, hyp_vcpu, prot);
out:
cpu_reg(host_ctxt, 1) = ret;
}
static void handle___pkvm_host_wrprotect_guest(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
DECLARE_REG(u64, gfn, host_ctxt, 2);
DECLARE_REG(u64, nr_pages, host_ctxt, 3);
struct pkvm_hyp_vm *hyp_vm;
int ret = -EINVAL;
hyp_vm = get_np_pkvm_hyp_vm(handle);
if (!hyp_vm)
goto out;
ret = __pkvm_host_wrprotect_guest(gfn, nr_pages, hyp_vm);
put_pkvm_hyp_vm(hyp_vm);
out:
cpu_reg(host_ctxt, 1) = ret;
}
static void handle___pkvm_host_test_clear_young_guest(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
DECLARE_REG(u64, gfn, host_ctxt, 2);
DECLARE_REG(u64, nr_pages, host_ctxt, 3);
DECLARE_REG(bool, mkold, host_ctxt, 4);
struct pkvm_hyp_vm *hyp_vm;
int ret = -EINVAL;
hyp_vm = get_np_pkvm_hyp_vm(handle);
if (!hyp_vm)
goto out;
ret = __pkvm_host_test_clear_young_guest(gfn, nr_pages, mkold, hyp_vm);
put_pkvm_hyp_vm(hyp_vm);
out:
cpu_reg(host_ctxt, 1) = ret;
}
static void handle___pkvm_host_mkyoung_guest(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(u64, gfn, host_ctxt, 1);
struct pkvm_hyp_vcpu *hyp_vcpu;
int ret = -EINVAL;
hyp_vcpu = pkvm_get_loaded_hyp_vcpu();
if (!hyp_vcpu || pkvm_hyp_vcpu_is_protected(hyp_vcpu))
goto out;
ret = __pkvm_host_mkyoung_guest(gfn, hyp_vcpu);
out:
cpu_reg(host_ctxt, 1) = ret;
}
static void handle___kvm_adjust_pc(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct kvm_vcpu *, vcpu, host_ctxt, 1);
__kvm_adjust_pc(kern_hyp_va(vcpu));
}
static void handle___kvm_flush_vm_context(struct kvm_cpu_context *host_ctxt)
{
__kvm_flush_vm_context();
}
static void handle___kvm_tlb_flush_vmid_ipa(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
DECLARE_REG(phys_addr_t, ipa, host_ctxt, 2);
DECLARE_REG(int, level, host_ctxt, 3);
__kvm_tlb_flush_vmid_ipa(kern_hyp_va(mmu), ipa, level);
}
static void handle___kvm_tlb_flush_vmid_ipa_nsh(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
DECLARE_REG(phys_addr_t, ipa, host_ctxt, 2);
DECLARE_REG(int, level, host_ctxt, 3);
__kvm_tlb_flush_vmid_ipa_nsh(kern_hyp_va(mmu), ipa, level);
}
static void
handle___kvm_tlb_flush_vmid_range(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
DECLARE_REG(phys_addr_t, start, host_ctxt, 2);
DECLARE_REG(unsigned long, pages, host_ctxt, 3);
__kvm_tlb_flush_vmid_range(kern_hyp_va(mmu), start, pages);
}
static void handle___kvm_tlb_flush_vmid(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
__kvm_tlb_flush_vmid(kern_hyp_va(mmu));
}
static void handle___pkvm_tlb_flush_vmid(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
struct pkvm_hyp_vm *hyp_vm = get_np_pkvm_hyp_vm(handle);
if (!hyp_vm)
return;
__kvm_tlb_flush_vmid(&hyp_vm->kvm.arch.mmu);
put_pkvm_hyp_vm(hyp_vm);
}
static void handle___kvm_flush_cpu_context(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
__kvm_flush_cpu_context(kern_hyp_va(mmu));
}
static void handle___kvm_timer_set_cntvoff(struct kvm_cpu_context *host_ctxt)
{
__kvm_timer_set_cntvoff(cpu_reg(host_ctxt, 1));
}
static void handle___kvm_enable_ssbs(struct kvm_cpu_context *host_ctxt)
{
u64 tmp;
tmp = read_sysreg_el2(SYS_SCTLR);
tmp |= SCTLR_ELx_DSSBS;
write_sysreg_el2(tmp, SYS_SCTLR);
}
static void handle___vgic_v3_get_gic_config(struct kvm_cpu_context *host_ctxt)
{
cpu_reg(host_ctxt, 1) = __vgic_v3_get_gic_config();
}
static void handle___vgic_v3_init_lrs(struct kvm_cpu_context *host_ctxt)
{
__vgic_v3_init_lrs();
}
static void handle___vgic_v3_save_aprs(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct vgic_v3_cpu_if *, cpu_if, host_ctxt, 1);
__vgic_v3_save_aprs(kern_hyp_va(cpu_if));
}
static void handle___vgic_v3_restore_vmcr_aprs(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct vgic_v3_cpu_if *, cpu_if, host_ctxt, 1);
__vgic_v3_restore_vmcr_aprs(kern_hyp_va(cpu_if));
}
static void handle___pkvm_init(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(phys_addr_t, phys, host_ctxt, 1);
DECLARE_REG(unsigned long, size, host_ctxt, 2);
DECLARE_REG(unsigned long *, per_cpu_base, host_ctxt, 3);
DECLARE_REG(u32, hyp_va_bits, host_ctxt, 4);
/*
* __pkvm_init() will return only if an error occurred, otherwise it
* will tail-call in __pkvm_init_finalise() which will have to deal
* with the host context directly.
*/
cpu_reg(host_ctxt, 1) = __pkvm_init(phys, size, per_cpu_base, hyp_va_bits);
}
static void handle___pkvm_cpu_set_vector(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(enum arm64_hyp_spectre_vector, slot, host_ctxt, 1);
cpu_reg(host_ctxt, 1) = pkvm_cpu_set_vector(slot);
}
static void handle___pkvm_host_share_hyp(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(u64, pfn, host_ctxt, 1);
cpu_reg(host_ctxt, 1) = __pkvm_host_share_hyp(pfn);
}
static void handle___pkvm_host_unshare_hyp(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(u64, pfn, host_ctxt, 1);
cpu_reg(host_ctxt, 1) = __pkvm_host_unshare_hyp(pfn);
}
static void handle___pkvm_create_private_mapping(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(phys_addr_t, phys, host_ctxt, 1);
DECLARE_REG(size_t, size, host_ctxt, 2);
DECLARE_REG(enum kvm_pgtable_prot, prot, host_ctxt, 3);
/*
* __pkvm_create_private_mapping() populates a pointer with the
* hypervisor start address of the allocation.
*
* However, handle___pkvm_create_private_mapping() hypercall crosses the
* EL1/EL2 boundary so the pointer would not be valid in this context.
*
* Instead pass the allocation address as the return value (or return
* ERR_PTR() on failure).
*/
unsigned long haddr;
int err = __pkvm_create_private_mapping(phys, size, prot, &haddr);
if (err)
haddr = (unsigned long)ERR_PTR(err);
cpu_reg(host_ctxt, 1) = haddr;
}
static void handle___pkvm_prot_finalize(struct kvm_cpu_context *host_ctxt)
{
cpu_reg(host_ctxt, 1) = __pkvm_prot_finalize();
}
static void handle___pkvm_reserve_vm(struct kvm_cpu_context *host_ctxt)
{
cpu_reg(host_ctxt, 1) = __pkvm_reserve_vm();
}
static void handle___pkvm_unreserve_vm(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
__pkvm_unreserve_vm(handle);
}
static void handle___pkvm_init_vm(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct kvm *, host_kvm, host_ctxt, 1);
DECLARE_REG(unsigned long, vm_hva, host_ctxt, 2);
DECLARE_REG(unsigned long, pgd_hva, host_ctxt, 3);
host_kvm = kern_hyp_va(host_kvm);
cpu_reg(host_ctxt, 1) = __pkvm_init_vm(host_kvm, vm_hva, pgd_hva);
}
static void handle___pkvm_init_vcpu(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
DECLARE_REG(struct kvm_vcpu *, host_vcpu, host_ctxt, 2);
DECLARE_REG(unsigned long, vcpu_hva, host_ctxt, 3);
host_vcpu = kern_hyp_va(host_vcpu);
cpu_reg(host_ctxt, 1) = __pkvm_init_vcpu(handle, host_vcpu, vcpu_hva);
}
static void handle___pkvm_vcpu_in_poison_fault(struct kvm_cpu_context *host_ctxt)
{
int ret;
struct pkvm_hyp_vcpu *hyp_vcpu = pkvm_get_loaded_hyp_vcpu();
ret = hyp_vcpu ? __pkvm_vcpu_in_poison_fault(hyp_vcpu) : -EINVAL;
cpu_reg(host_ctxt, 1) = ret;
}
static void handle___pkvm_force_reclaim_guest_page(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(phys_addr_t, phys, host_ctxt, 1);
cpu_reg(host_ctxt, 1) = __pkvm_host_force_reclaim_page_guest(phys);
}
static void handle___pkvm_reclaim_dying_guest_page(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
DECLARE_REG(u64, gfn, host_ctxt, 2);
cpu_reg(host_ctxt, 1) = __pkvm_reclaim_dying_guest_page(handle, gfn);
}
static void handle___pkvm_start_teardown_vm(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
cpu_reg(host_ctxt, 1) = __pkvm_start_teardown_vm(handle);
}
static void handle___pkvm_finalize_teardown_vm(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
cpu_reg(host_ctxt, 1) = __pkvm_finalize_teardown_vm(handle);
}
static void handle___tracing_load(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(unsigned long, desc_hva, host_ctxt, 1);
DECLARE_REG(size_t, desc_size, host_ctxt, 2);
cpu_reg(host_ctxt, 1) = __tracing_load(desc_hva, desc_size);
}
static void handle___tracing_unload(struct kvm_cpu_context *host_ctxt)
{
__tracing_unload();
}
static void handle___tracing_enable(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(bool, enable, host_ctxt, 1);
cpu_reg(host_ctxt, 1) = __tracing_enable(enable);
}
static void handle___tracing_swap_reader(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(unsigned int, cpu, host_ctxt, 1);
cpu_reg(host_ctxt, 1) = __tracing_swap_reader(cpu);
}
static void handle___tracing_update_clock(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(u32, mult, host_ctxt, 1);
DECLARE_REG(u32, shift, host_ctxt, 2);
DECLARE_REG(u64, epoch_ns, host_ctxt, 3);
DECLARE_REG(u64, epoch_cyc, host_ctxt, 4);
__tracing_update_clock(mult, shift, epoch_ns, epoch_cyc);
}
static void handle___tracing_reset(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(unsigned int, cpu, host_ctxt, 1);
cpu_reg(host_ctxt, 1) = __tracing_reset(cpu);
}
static void handle___tracing_enable_event(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(unsigned short, id, host_ctxt, 1);
DECLARE_REG(bool, enable, host_ctxt, 2);
cpu_reg(host_ctxt, 1) = __tracing_enable_event(id, enable);
}
static void handle___tracing_write_event(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(u64, id, host_ctxt, 1);
trace_selftest(id);
}
static void handle___vgic_v5_save_apr(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct vgic_v5_cpu_if *, cpu_if, host_ctxt, 1);
__vgic_v5_save_apr(kern_hyp_va(cpu_if));
}
static void handle___vgic_v5_restore_vmcr_apr(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct vgic_v5_cpu_if *, cpu_if, host_ctxt, 1);
__vgic_v5_restore_vmcr_apr(kern_hyp_va(cpu_if));
}
typedef void (*hcall_t)(struct kvm_cpu_context *);
#define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = (hcall_t)handle_##x
static const hcall_t host_hcall[] = {
/* ___kvm_hyp_init */
HANDLE_FUNC(__pkvm_init),
HANDLE_FUNC(__pkvm_create_private_mapping),
HANDLE_FUNC(__pkvm_cpu_set_vector),
HANDLE_FUNC(__kvm_enable_ssbs),
HANDLE_FUNC(__vgic_v3_init_lrs),
HANDLE_FUNC(__vgic_v3_get_gic_config),
HANDLE_FUNC(__pkvm_prot_finalize),
HANDLE_FUNC(__kvm_adjust_pc),
HANDLE_FUNC(__kvm_vcpu_run),
HANDLE_FUNC(__kvm_flush_vm_context),
HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa),
HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa_nsh),
HANDLE_FUNC(__kvm_tlb_flush_vmid),
HANDLE_FUNC(__kvm_tlb_flush_vmid_range),
HANDLE_FUNC(__kvm_flush_cpu_context),
HANDLE_FUNC(__kvm_timer_set_cntvoff),
HANDLE_FUNC(__tracing_load),
HANDLE_FUNC(__tracing_unload),
HANDLE_FUNC(__tracing_enable),
HANDLE_FUNC(__tracing_swap_reader),
HANDLE_FUNC(__tracing_update_clock),
HANDLE_FUNC(__tracing_reset),
HANDLE_FUNC(__tracing_enable_event),
HANDLE_FUNC(__tracing_write_event),
HANDLE_FUNC(__vgic_v3_save_aprs),
HANDLE_FUNC(__vgic_v3_restore_vmcr_aprs),
HANDLE_FUNC(__vgic_v5_save_apr),
HANDLE_FUNC(__vgic_v5_restore_vmcr_apr),
HANDLE_FUNC(__pkvm_host_share_hyp),
HANDLE_FUNC(__pkvm_host_unshare_hyp),
HANDLE_FUNC(__pkvm_host_donate_guest),
HANDLE_FUNC(__pkvm_host_share_guest),
HANDLE_FUNC(__pkvm_host_unshare_guest),
HANDLE_FUNC(__pkvm_host_relax_perms_guest),
HANDLE_FUNC(__pkvm_host_wrprotect_guest),
HANDLE_FUNC(__pkvm_host_test_clear_young_guest),
HANDLE_FUNC(__pkvm_host_mkyoung_guest),
HANDLE_FUNC(__pkvm_reserve_vm),
HANDLE_FUNC(__pkvm_unreserve_vm),
HANDLE_FUNC(__pkvm_init_vm),
HANDLE_FUNC(__pkvm_init_vcpu),
HANDLE_FUNC(__pkvm_vcpu_in_poison_fault),
HANDLE_FUNC(__pkvm_force_reclaim_guest_page),
HANDLE_FUNC(__pkvm_reclaim_dying_guest_page),
HANDLE_FUNC(__pkvm_start_teardown_vm),
HANDLE_FUNC(__pkvm_finalize_teardown_vm),
HANDLE_FUNC(__pkvm_vcpu_load),
HANDLE_FUNC(__pkvm_vcpu_put),
HANDLE_FUNC(__pkvm_tlb_flush_vmid),
};
static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(unsigned long, id, host_ctxt, 0);
unsigned long hcall_min = 0, hcall_max = __KVM_HOST_SMCCC_FUNC_MAX;
hcall_t hfn;
BUILD_BUG_ON(ARRAY_SIZE(host_hcall) != __KVM_HOST_SMCCC_FUNC_MAX);
/*
* If pKVM has been initialised then reject any calls to the
* early "privileged" hypercalls. Note that we cannot reject
* calls to __pkvm_prot_finalize for two reasons: (1) The static
* key used to determine initialisation must be toggled prior to
* finalisation and (2) finalisation is performed on a per-CPU
* basis. This is all fine, however, since __pkvm_prot_finalize
* returns -EPERM after the first call for a given CPU.
*/
if (static_branch_unlikely(&kvm_protected_mode_initialized)) {
hcall_min = __KVM_HOST_SMCCC_FUNC_MIN_PKVM;
} else {
hcall_max = __KVM_HOST_SMCCC_FUNC_PKVM_ONLY;
}
id &= ~ARM_SMCCC_CALL_HINTS;
id -= KVM_HOST_SMCCC_ID(0);
if (unlikely(id < hcall_min || id >= hcall_max))
goto inval;
hfn = host_hcall[id];
if (unlikely(!hfn))
goto inval;
cpu_reg(host_ctxt, 0) = SMCCC_RET_SUCCESS;
hfn(host_ctxt);
return;
inval:
cpu_reg(host_ctxt, 0) = SMCCC_RET_NOT_SUPPORTED;
}
static void default_host_smc_handler(struct kvm_cpu_context *host_ctxt)
{
trace_hyp_exit(host_ctxt, HYP_REASON_SMC);
__kvm_hyp_host_forward_smc(host_ctxt);
trace_hyp_enter(host_ctxt, HYP_REASON_SMC);
}
static void handle_host_smc(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(u64, func_id, host_ctxt, 0);
u64 esr = read_sysreg_el2(SYS_ESR);
bool handled;
if (esr & ESR_ELx_xVC_IMM_MASK) {
cpu_reg(host_ctxt, 0) = SMCCC_RET_NOT_SUPPORTED;
goto exit_skip_instr;
}
func_id &= ~ARM_SMCCC_CALL_HINTS;
handled = kvm_host_psci_handler(host_ctxt, func_id);
if (!handled)
handled = kvm_host_ffa_handler(host_ctxt, func_id);
if (!handled)
default_host_smc_handler(host_ctxt);
exit_skip_instr:
/* SMC was trapped, move ELR past the current PC. */
kvm_skip_host_instr();
}
void inject_host_exception(u64 esr)
{
u64 sctlr, spsr_el1, spsr_el2, exc_offset = except_type_sync;
const u64 spsr_mask = PSR_N_BIT | PSR_Z_BIT | PSR_C_BIT |
PSR_V_BIT | PSR_DIT_BIT | PSR_PAN_BIT;
spsr_el1 = spsr_el2 = read_sysreg_el2(SYS_SPSR);
switch (spsr_el1 & (PSR_MODE_MASK | PSR_MODE32_BIT)) {
case PSR_MODE_EL0t:
exc_offset += LOWER_EL_AArch64_VECTOR;
break;
case PSR_MODE_EL0t | PSR_MODE32_BIT:
exc_offset += LOWER_EL_AArch32_VECTOR;
break;
default:
exc_offset += CURRENT_EL_SP_ELx_VECTOR;
}
spsr_el2 &= spsr_mask;
spsr_el2 |= PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT |
PSR_MODE_EL1h;
sctlr = read_sysreg_el1(SYS_SCTLR);
if (!(sctlr & SCTLR_EL1_SPAN))
spsr_el2 |= PSR_PAN_BIT;
if (sctlr & SCTLR_ELx_DSSBS)
spsr_el2 |= PSR_SSBS_BIT;
if (system_supports_mte())
spsr_el2 |= PSR_TCO_BIT;
if (esr_fsc_is_translation_fault(esr))
write_sysreg_el1(read_sysreg_el2(SYS_FAR), SYS_FAR);
write_sysreg_el1(esr, SYS_ESR);
write_sysreg_el1(read_sysreg_el2(SYS_ELR), SYS_ELR);
write_sysreg_el1(spsr_el1, SYS_SPSR);
write_sysreg_el2(read_sysreg_el1(SYS_VBAR) + exc_offset, SYS_ELR);
write_sysreg_el2(spsr_el2, SYS_SPSR);
}
static void inject_host_undef64(void)
{
inject_host_exception((ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT) |
ESR_ELx_IL);
}
static bool handle_host_mte(u64 esr)
{
switch (esr_sys64_to_sysreg(esr)) {
case SYS_RGSR_EL1:
case SYS_GCR_EL1:
case SYS_TFSR_EL1:
case SYS_TFSRE0_EL1:
/* If we're here for any reason other than MTE, it's a bug. */
if (read_sysreg(HCR_EL2) & HCR_ATA)
return false;
break;
case SYS_GMID_EL1:
/* If we're here for any reason other than MTE, it's a bug. */
if (!(read_sysreg(HCR_EL2) & HCR_TID5))
return false;
break;
default:
return false;
}
inject_host_undef64();
return true;
}
void handle_trap(struct kvm_cpu_context *host_ctxt)
{
u64 esr = read_sysreg_el2(SYS_ESR);
switch (ESR_ELx_EC(esr)) {
case ESR_ELx_EC_HVC64:
trace_hyp_enter(host_ctxt, HYP_REASON_HVC);
handle_host_hcall(host_ctxt);
break;
case ESR_ELx_EC_SMC64:
trace_hyp_enter(host_ctxt, HYP_REASON_SMC);
handle_host_smc(host_ctxt);
break;
case ESR_ELx_EC_IABT_LOW:
case ESR_ELx_EC_DABT_LOW:
trace_hyp_enter(host_ctxt, HYP_REASON_HOST_ABORT);
handle_host_mem_abort(host_ctxt);
break;
case ESR_ELx_EC_SYS64:
if (handle_host_mte(esr))
break;
fallthrough;
default:
BUG();
}
trace_hyp_exit(host_ctxt, HYP_REASON_ERET_HOST);
}