From 87f842c6c6543cf0dd66161fdf4b62cec804479b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 1 Mar 2024 11:16:56 +0000 Subject: [PATCH 1/6] KVM: arm64: Add accessor for per-CPU state In order to facilitate the introduction of new per-CPU state, add a new host_data_ptr() helped that hides some of the per-CPU verbosity, and make it easier to move that state around in the future. Reviewed-by: Suzuki K Poulose Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 37 +++++++++++++++++++++++ arch/arm64/kvm/arm.c | 2 +- arch/arm64/kvm/hyp/include/hyp/debug-sr.h | 4 +-- arch/arm64/kvm/hyp/include/hyp/switch.h | 8 ++--- arch/arm64/kvm/hyp/nvhe/psci-relay.c | 2 +- arch/arm64/kvm/hyp/nvhe/setup.c | 3 +- arch/arm64/kvm/hyp/nvhe/switch.c | 4 +-- arch/arm64/kvm/hyp/vhe/switch.c | 4 +-- arch/arm64/kvm/hyp/vhe/sysreg-sr.c | 4 +-- arch/arm64/kvm/pmu.c | 2 +- 10 files changed, 53 insertions(+), 17 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 9e8a496fb284..f41db42529df 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -530,6 +530,17 @@ struct kvm_cpu_context { u64 *vncr_array; }; +/* + * This structure is instantiated on a per-CPU basis, and contains + * data that is: + * + * - tied to a single physical CPU, and + * - either have a lifetime that does not extend past vcpu_put() + * - or is an invariant for the lifetime of the system + * + * Use host_data_ptr(field) as a way to access a pointer to such a + * field. + */ struct kvm_host_data { struct kvm_cpu_context host_ctxt; }; @@ -1168,6 +1179,32 @@ struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); DECLARE_KVM_HYP_PER_CPU(struct kvm_host_data, kvm_host_data); +/* + * How we access per-CPU host data depends on the where we access it from, + * and the mode we're in: + * + * - VHE and nVHE hypervisor bits use their locally defined instance + * + * - the rest of the kernel use either the VHE or nVHE one, depending on + * the mode we're running in. + * + * Unless we're in protected mode, fully deprivileged, and the nVHE + * per-CPU stuff is exclusively accessible to the protected EL2 code. + * In this case, the EL1 code uses the *VHE* data as its private state + * (which makes sense in a way as there shouldn't be any shared state + * between the host and the hypervisor). + * + * Yes, this is all totally trivial. Shoot me now. + */ +#if defined(__KVM_NVHE_HYPERVISOR__) || defined(__KVM_VHE_HYPERVISOR__) +#define host_data_ptr(f) (&this_cpu_ptr(&kvm_host_data)->f) +#else +#define host_data_ptr(f) \ + (static_branch_unlikely(&kvm_protected_mode_initialized) ? \ + &this_cpu_ptr(&kvm_host_data)->f : \ + &this_cpu_ptr_hyp_sym(kvm_host_data)->f) +#endif + static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt) { /* The host's MPIDR is immutable, so let's set it up at boot time */ diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 3dee5490eea9..a24287c3ba99 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1971,7 +1971,7 @@ static void cpu_set_hyp_vector(void) static void cpu_hyp_init_context(void) { - kvm_init_host_cpu_context(&this_cpu_ptr_hyp_sym(kvm_host_data)->host_ctxt); + kvm_init_host_cpu_context(host_data_ptr(host_ctxt)); if (!is_kernel_in_hyp_mode()) cpu_init_hyp_mode(); diff --git a/arch/arm64/kvm/hyp/include/hyp/debug-sr.h b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h index 961bbef104a6..eec0f8ccda56 100644 --- a/arch/arm64/kvm/hyp/include/hyp/debug-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h @@ -135,7 +135,7 @@ static inline void __debug_switch_to_guest_common(struct kvm_vcpu *vcpu) if (!vcpu_get_flag(vcpu, DEBUG_DIRTY)) return; - host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + host_ctxt = host_data_ptr(host_ctxt); guest_ctxt = &vcpu->arch.ctxt; host_dbg = &vcpu->arch.host_debug_state.regs; guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr); @@ -154,7 +154,7 @@ static inline void __debug_switch_to_host_common(struct kvm_vcpu *vcpu) if (!vcpu_get_flag(vcpu, DEBUG_DIRTY)) return; - host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + host_ctxt = host_data_ptr(host_ctxt); guest_ctxt = &vcpu->arch.ctxt; host_dbg = &vcpu->arch.host_debug_state.regs; guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr); diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index e3fcf8c4d5b4..ae198b84ca01 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -155,7 +155,7 @@ static inline bool cpu_has_amu(void) static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu) { - struct kvm_cpu_context *hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt); struct kvm *kvm = kern_hyp_va(vcpu->kvm); CHECK_FGT_MASKS(HFGRTR_EL2); @@ -191,7 +191,7 @@ static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu) static inline void __deactivate_traps_hfgxtr(struct kvm_vcpu *vcpu) { - struct kvm_cpu_context *hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt); struct kvm *kvm = kern_hyp_va(vcpu->kvm); if (!cpus_have_final_cap(ARM64_HAS_FGT)) @@ -226,7 +226,7 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) write_sysreg(0, pmselr_el0); - hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + hctxt = host_data_ptr(host_ctxt); ctxt_sys_reg(hctxt, PMUSERENR_EL0) = read_sysreg(pmuserenr_el0); write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); vcpu_set_flag(vcpu, PMUSERENR_ON_CPU); @@ -260,7 +260,7 @@ static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu) if (kvm_arm_support_pmu_v3()) { struct kvm_cpu_context *hctxt; - hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + hctxt = host_data_ptr(host_ctxt); write_sysreg(ctxt_sys_reg(hctxt, PMUSERENR_EL0), pmuserenr_el0); vcpu_clear_flag(vcpu, PMUSERENR_ON_CPU); } diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c index d57bcb6ab94d..dfe8fe0f7eaf 100644 --- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c +++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c @@ -205,7 +205,7 @@ asmlinkage void __noreturn __kvm_host_psci_cpu_entry(bool is_cpu_on) struct psci_boot_args *boot_args; struct kvm_cpu_context *host_ctxt; - host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + host_ctxt = host_data_ptr(host_ctxt); if (is_cpu_on) boot_args = this_cpu_ptr(&cpu_on_args); diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index bc58d1b515af..ae00dfa80801 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -257,8 +257,7 @@ static int fix_hyp_pgtable_refcnt(void) void __noreturn __pkvm_init_finalise(void) { - struct kvm_host_data *host_data = this_cpu_ptr(&kvm_host_data); - struct kvm_cpu_context *host_ctxt = &host_data->host_ctxt; + struct kvm_cpu_context *host_ctxt = host_data_ptr(host_ctxt); unsigned long nr_pages, reserved_pages, pfn; int ret; diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index c50f8459e4fc..544a419b9a39 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -264,7 +264,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) pmr_sync(); } - host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + host_ctxt = host_data_ptr(host_ctxt); host_ctxt->__hyp_running_vcpu = vcpu; guest_ctxt = &vcpu->arch.ctxt; @@ -367,7 +367,7 @@ asmlinkage void __noreturn hyp_panic(void) struct kvm_cpu_context *host_ctxt; struct kvm_vcpu *vcpu; - host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + host_ctxt = host_data_ptr(host_ctxt); vcpu = host_ctxt->__hyp_running_vcpu; if (vcpu) { diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 1581df6aec87..14b7a6bc5909 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -221,7 +221,7 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) struct kvm_cpu_context *guest_ctxt; u64 exit_code; - host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + host_ctxt = host_data_ptr(host_ctxt); host_ctxt->__hyp_running_vcpu = vcpu; guest_ctxt = &vcpu->arch.ctxt; @@ -306,7 +306,7 @@ static void __hyp_call_panic(u64 spsr, u64 elr, u64 par) struct kvm_cpu_context *host_ctxt; struct kvm_vcpu *vcpu; - host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + host_ctxt = host_data_ptr(host_ctxt); vcpu = host_ctxt->__hyp_running_vcpu; __deactivate_traps(vcpu); diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c index a8b9ea496706..e12bd7d6d2dc 100644 --- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c @@ -67,7 +67,7 @@ void __vcpu_load_switch_sysregs(struct kvm_vcpu *vcpu) struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt; struct kvm_cpu_context *host_ctxt; - host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + host_ctxt = host_data_ptr(host_ctxt); __sysreg_save_user_state(host_ctxt); /* @@ -110,7 +110,7 @@ void __vcpu_put_switch_sysregs(struct kvm_vcpu *vcpu) struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt; struct kvm_cpu_context *host_ctxt; - host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + host_ctxt = host_data_ptr(host_ctxt); __sysreg_save_el1_state(guest_ctxt); __sysreg_save_user_state(guest_ctxt); diff --git a/arch/arm64/kvm/pmu.c b/arch/arm64/kvm/pmu.c index a243934c5568..329819806096 100644 --- a/arch/arm64/kvm/pmu.c +++ b/arch/arm64/kvm/pmu.c @@ -232,7 +232,7 @@ bool kvm_set_pmuserenr(u64 val) if (!vcpu || !vcpu_get_flag(vcpu, PMUSERENR_ON_CPU)) return false; - hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + hctxt = host_data_ptr(host_ctxt); ctxt_sys_reg(hctxt, PMUSERENR_EL0) = val; return true; } From 6db55734ec4008da39e10d2fffa913fd9751ccaa Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 26 Feb 2024 15:58:46 +0000 Subject: [PATCH 2/6] KVM: arm64: Exclude host_debug_data from vcpu_arch Keeping host_debug_state on a per-vcpu basis is completely pointless. The lifetime of this data is only that of the inner run-loop, which means it is never accessed outside of the core EL2 code. Move the structure into kvm_host_data, and save over 500 bytes per vcpu. Reviewed-by: Suzuki K Poulose Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 31 +++++++++++++---------- arch/arm64/kvm/hyp/include/hyp/debug-sr.h | 4 +-- arch/arm64/kvm/hyp/nvhe/debug-sr.c | 8 +++--- 3 files changed, 23 insertions(+), 20 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index f41db42529df..d7bcb8ce1d7b 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -543,6 +543,19 @@ struct kvm_cpu_context { */ struct kvm_host_data { struct kvm_cpu_context host_ctxt; + + /* + * host_debug_state contains the host registers which are + * saved and restored during world switches. + */ + struct { + /* {Break,watch}point registers */ + struct kvm_guest_debug_arch regs; + /* Statistical profiling extension */ + u64 pmscr_el1; + /* Self-hosted trace */ + u64 trfcr_el1; + } host_debug_state; }; struct kvm_host_psci_config { @@ -638,11 +651,10 @@ struct kvm_vcpu_arch { * We maintain more than a single set of debug registers to support * debugging the guest from the host and to maintain separate host and * guest state during world switches. vcpu_debug_state are the debug - * registers of the vcpu as the guest sees them. host_debug_state are - * the host registers which are saved and restored during - * world switches. external_debug_state contains the debug - * values we want to debug the guest. This is set via the - * KVM_SET_GUEST_DEBUG ioctl. + * registers of the vcpu as the guest sees them. + * + * external_debug_state contains the debug values we want to debug the + * guest. This is set via the KVM_SET_GUEST_DEBUG ioctl. * * debug_ptr points to the set of debug registers that should be loaded * onto the hardware when running the guest. @@ -654,15 +666,6 @@ struct kvm_vcpu_arch { struct user_fpsimd_state *host_fpsimd_state; /* hyp VA */ struct task_struct *parent_task; - struct { - /* {Break,watch}point registers */ - struct kvm_guest_debug_arch regs; - /* Statistical profiling extension */ - u64 pmscr_el1; - /* Self-hosted trace */ - u64 trfcr_el1; - } host_debug_state; - /* VGIC state */ struct vgic_cpu vgic_cpu; struct arch_timer_cpu timer_cpu; diff --git a/arch/arm64/kvm/hyp/include/hyp/debug-sr.h b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h index eec0f8ccda56..d00093699aaf 100644 --- a/arch/arm64/kvm/hyp/include/hyp/debug-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h @@ -137,7 +137,7 @@ static inline void __debug_switch_to_guest_common(struct kvm_vcpu *vcpu) host_ctxt = host_data_ptr(host_ctxt); guest_ctxt = &vcpu->arch.ctxt; - host_dbg = &vcpu->arch.host_debug_state.regs; + host_dbg = host_data_ptr(host_debug_state.regs); guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr); __debug_save_state(host_dbg, host_ctxt); @@ -156,7 +156,7 @@ static inline void __debug_switch_to_host_common(struct kvm_vcpu *vcpu) host_ctxt = host_data_ptr(host_ctxt); guest_ctxt = &vcpu->arch.ctxt; - host_dbg = &vcpu->arch.host_debug_state.regs; + host_dbg = host_data_ptr(host_debug_state.regs); guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr); __debug_save_state(guest_dbg, guest_ctxt); diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c index 7746ea507b6f..53efda0235cf 100644 --- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c +++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c @@ -83,10 +83,10 @@ void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu) { /* Disable and flush SPE data generation */ if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_SPE)) - __debug_save_spe(&vcpu->arch.host_debug_state.pmscr_el1); + __debug_save_spe(host_data_ptr(host_debug_state.pmscr_el1)); /* Disable and flush Self-Hosted Trace generation */ if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_TRBE)) - __debug_save_trace(&vcpu->arch.host_debug_state.trfcr_el1); + __debug_save_trace(host_data_ptr(host_debug_state.trfcr_el1)); } void __debug_switch_to_guest(struct kvm_vcpu *vcpu) @@ -97,9 +97,9 @@ void __debug_switch_to_guest(struct kvm_vcpu *vcpu) void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu) { if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_SPE)) - __debug_restore_spe(vcpu->arch.host_debug_state.pmscr_el1); + __debug_restore_spe(*host_data_ptr(host_debug_state.pmscr_el1)); if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_TRBE)) - __debug_restore_trace(vcpu->arch.host_debug_state.trfcr_el1); + __debug_restore_trace(*host_data_ptr(host_debug_state.trfcr_el1)); } void __debug_switch_to_host(struct kvm_vcpu *vcpu) From 4bacd723705a6b6c8386daf3d5148aca66135f3c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 27 Feb 2024 08:27:54 +0000 Subject: [PATCH 3/6] KVM: arm64: Exclude mdcr_el2_host from kvm_vcpu_arch As for the rest of the host debug state, the host copy of mdcr_el2 has little to do in the vcpu, and is better placed in the host_data structure. Reviewed-by : Suzuki K Poulose Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 5 ++--- arch/arm64/kvm/hyp/include/hyp/switch.h | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index d7bcb8ce1d7b..a04e69fb2884 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -555,6 +555,8 @@ struct kvm_host_data { u64 pmscr_el1; /* Self-hosted trace */ u64 trfcr_el1; + /* Values of trap registers for the host before guest entry. */ + u64 mdcr_el2; } host_debug_state; }; @@ -616,9 +618,6 @@ struct kvm_vcpu_arch { u64 mdcr_el2; u64 cptr_el2; - /* Values of trap registers for the host before guest entry. */ - u64 mdcr_el2_host; - /* Exception Information */ struct kvm_vcpu_fault_info fault; diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index ae198b84ca01..7d7de0245ed0 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -232,7 +232,7 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) vcpu_set_flag(vcpu, PMUSERENR_ON_CPU); } - vcpu->arch.mdcr_el2_host = read_sysreg(mdcr_el2); + *host_data_ptr(host_debug_state.mdcr_el2) = read_sysreg(mdcr_el2); write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); if (cpus_have_final_cap(ARM64_HAS_HCX)) { @@ -254,7 +254,7 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu) { - write_sysreg(vcpu->arch.mdcr_el2_host, mdcr_el2); + write_sysreg(*host_data_ptr(host_debug_state.mdcr_el2), mdcr_el2); write_sysreg(0, hstr_el2); if (kvm_arm_support_pmu_v3()) { From 51e09b5572d665645ce394f94f24a7d6ec32bda9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 1 Mar 2024 12:06:44 +0000 Subject: [PATCH 4/6] KVM: arm64: Exclude host_fpsimd_state pointer from kvm_vcpu_arch As the name of the field indicates, host_fpsimd_state is strictly a host piece of data, and we reset this pointer on each PID change. So let's move it where it belongs, and set it at load-time. Although this is slightly more often, it is a well defined life-cycle which matches other pieces of data. Reviewed-by: Mark Brown Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 2 +- arch/arm64/kvm/fpsimd.c | 3 +-- arch/arm64/kvm/hyp/include/hyp/switch.h | 2 +- arch/arm64/kvm/hyp/nvhe/hyp-main.c | 1 - 4 files changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index a04e69fb2884..21730d5ac006 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -543,6 +543,7 @@ struct kvm_cpu_context { */ struct kvm_host_data { struct kvm_cpu_context host_ctxt; + struct user_fpsimd_state *fpsimd_state; /* hyp VA */ /* * host_debug_state contains the host registers which are @@ -662,7 +663,6 @@ struct kvm_vcpu_arch { struct kvm_guest_debug_arch vcpu_debug_state; struct kvm_guest_debug_arch external_debug_state; - struct user_fpsimd_state *host_fpsimd_state; /* hyp VA */ struct task_struct *parent_task; /* VGIC state */ diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 826307e19e3a..d30dffc800b6 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -49,8 +49,6 @@ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu) if (ret) return ret; - vcpu->arch.host_fpsimd_state = kern_hyp_va(fpsimd); - /* * We need to keep current's task_struct pinned until its data has been * unshared with the hypervisor to make sure it is not re-used by the @@ -87,6 +85,7 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) * FP_STATE_FREE if the flag set. */ vcpu->arch.fp_state = FP_STATE_HOST_OWNED; + *host_data_ptr(fpsimd_state) = kern_hyp_va(¤t->thread.uw.fpsimd_state); vcpu_clear_flag(vcpu, HOST_SVE_ENABLED); if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN) diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 7d7de0245ed0..6def6ad8dd48 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -377,7 +377,7 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) /* Write out the host state if it's in the registers */ if (vcpu->arch.fp_state == FP_STATE_HOST_OWNED) - __fpsimd_save_state(vcpu->arch.host_fpsimd_state); + __fpsimd_save_state(*host_data_ptr(fpsimd_state)); /* Restore the guest state */ if (sve_guest) diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 2385fd03ed87..c5f625dc1f07 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -42,7 +42,6 @@ static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu) hyp_vcpu->vcpu.arch.fp_state = host_vcpu->arch.fp_state; hyp_vcpu->vcpu.arch.debug_ptr = kern_hyp_va(host_vcpu->arch.debug_ptr); - hyp_vcpu->vcpu.arch.host_fpsimd_state = host_vcpu->arch.host_fpsimd_state; hyp_vcpu->vcpu.arch.vsesr_el2 = host_vcpu->arch.vsesr_el2; From 5294afdbf45aced5295fe5941c58b40c41c23800 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 1 Mar 2024 17:42:31 +0000 Subject: [PATCH 5/6] KVM: arm64: Exclude FP ownership from kvm_vcpu_arch In retrospect, it is fairly obvious that the FP state ownership is only meaningful for a given CPU, and that locating this information in the vcpu was just a mistake. Move the ownership tracking into the host data structure, and rename it from fp_state to fp_owner, which is a better description (name suggested by Mark Brown). Reviewed-by: Mark Brown Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_emulate.h | 4 ++-- arch/arm64/include/asm/kvm_host.h | 14 +++++++------- arch/arm64/kvm/arm.c | 6 ------ arch/arm64/kvm/fpsimd.c | 10 +++++----- arch/arm64/kvm/hyp/include/hyp/switch.h | 6 +++--- arch/arm64/kvm/hyp/nvhe/hyp-main.c | 2 -- arch/arm64/kvm/hyp/nvhe/switch.c | 2 +- arch/arm64/kvm/hyp/vhe/switch.c | 2 +- 8 files changed, 19 insertions(+), 27 deletions(-) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 975af30af31f..3d65d9413608 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -588,7 +588,7 @@ static __always_inline u64 kvm_get_reset_cptr_el2(struct kvm_vcpu *vcpu) val = (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN); if (!vcpu_has_sve(vcpu) || - (vcpu->arch.fp_state != FP_STATE_GUEST_OWNED)) + (*host_data_ptr(fp_owner) != FP_STATE_GUEST_OWNED)) val |= CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN; if (cpus_have_final_cap(ARM64_SME)) val |= CPACR_EL1_SMEN_EL1EN | CPACR_EL1_SMEN_EL0EN; @@ -596,7 +596,7 @@ static __always_inline u64 kvm_get_reset_cptr_el2(struct kvm_vcpu *vcpu) val = CPTR_NVHE_EL2_RES1; if (vcpu_has_sve(vcpu) && - (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED)) + (*host_data_ptr(fp_owner) == FP_STATE_GUEST_OWNED)) val |= CPTR_EL2_TZ; if (cpus_have_final_cap(ARM64_SME)) val &= ~CPTR_EL2_TSM; diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 21730d5ac006..2b63fdfad5b2 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -545,6 +545,13 @@ struct kvm_host_data { struct kvm_cpu_context host_ctxt; struct user_fpsimd_state *fpsimd_state; /* hyp VA */ + /* Ownership of the FP regs */ + enum { + FP_STATE_FREE, + FP_STATE_HOST_OWNED, + FP_STATE_GUEST_OWNED, + } fp_owner; + /* * host_debug_state contains the host registers which are * saved and restored during world switches. @@ -622,13 +629,6 @@ struct kvm_vcpu_arch { /* Exception Information */ struct kvm_vcpu_fault_info fault; - /* Ownership of the FP regs */ - enum { - FP_STATE_FREE, - FP_STATE_HOST_OWNED, - FP_STATE_GUEST_OWNED, - } fp_state; - /* Configuration flags, set once and for all before the vcpu can run */ u8 cflags; diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index a24287c3ba99..66d8112da268 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -378,12 +378,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO; - /* - * Default value for the FP state, will be overloaded at load - * time if we support FP (pretty likely) - */ - vcpu->arch.fp_state = FP_STATE_FREE; - /* Set up the timer */ kvm_timer_vcpu_init(vcpu); diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index d30dffc800b6..7507dcc4e553 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -84,7 +84,7 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) * guest in kvm_arch_vcpu_ctxflush_fp() and override this to * FP_STATE_FREE if the flag set. */ - vcpu->arch.fp_state = FP_STATE_HOST_OWNED; + *host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED; *host_data_ptr(fpsimd_state) = kern_hyp_va(¤t->thread.uw.fpsimd_state); vcpu_clear_flag(vcpu, HOST_SVE_ENABLED); @@ -109,7 +109,7 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) * been saved, this is very unlikely to happen. */ if (read_sysreg_s(SYS_SVCR) & (SVCR_SM_MASK | SVCR_ZA_MASK)) { - vcpu->arch.fp_state = FP_STATE_FREE; + *host_data_ptr(fp_owner) = FP_STATE_FREE; fpsimd_save_and_flush_cpu_state(); } } @@ -125,7 +125,7 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu) { if (test_thread_flag(TIF_FOREIGN_FPSTATE)) - vcpu->arch.fp_state = FP_STATE_FREE; + *host_data_ptr(fp_owner) = FP_STATE_FREE; } /* @@ -141,7 +141,7 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) WARN_ON_ONCE(!irqs_disabled()); - if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED) { + if (*host_data_ptr(fp_owner) == FP_STATE_GUEST_OWNED) { /* * Currently we do not support SME guests so SVCR is @@ -195,7 +195,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) isb(); } - if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED) { + if (*host_data_ptr(fp_owner) == FP_STATE_GUEST_OWNED) { if (vcpu_has_sve(vcpu)) { __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR); diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 6def6ad8dd48..2629420d0659 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -42,7 +42,7 @@ extern struct kvm_exception_table_entry __stop___kvm_ex_table; /* Check whether the FP regs are owned by the guest */ static inline bool guest_owns_fp_regs(struct kvm_vcpu *vcpu) { - return vcpu->arch.fp_state == FP_STATE_GUEST_OWNED; + return *host_data_ptr(fp_owner) == FP_STATE_GUEST_OWNED; } /* Save the 32-bit only FPSIMD system register state */ @@ -376,7 +376,7 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) isb(); /* Write out the host state if it's in the registers */ - if (vcpu->arch.fp_state == FP_STATE_HOST_OWNED) + if (*host_data_ptr(fp_owner) == FP_STATE_HOST_OWNED) __fpsimd_save_state(*host_data_ptr(fpsimd_state)); /* Restore the guest state */ @@ -389,7 +389,7 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) if (!(read_sysreg(hcr_el2) & HCR_RW)) write_sysreg(__vcpu_sys_reg(vcpu, FPEXC32_EL2), fpexc32_el2); - vcpu->arch.fp_state = FP_STATE_GUEST_OWNED; + *host_data_ptr(fp_owner) = FP_STATE_GUEST_OWNED; return true; } diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index c5f625dc1f07..26561c562f7a 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -39,7 +39,6 @@ static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu) hyp_vcpu->vcpu.arch.cptr_el2 = host_vcpu->arch.cptr_el2; hyp_vcpu->vcpu.arch.iflags = host_vcpu->arch.iflags; - hyp_vcpu->vcpu.arch.fp_state = host_vcpu->arch.fp_state; hyp_vcpu->vcpu.arch.debug_ptr = kern_hyp_va(host_vcpu->arch.debug_ptr); @@ -63,7 +62,6 @@ static void sync_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu) host_vcpu->arch.fault = hyp_vcpu->vcpu.arch.fault; host_vcpu->arch.iflags = hyp_vcpu->vcpu.arch.iflags; - host_vcpu->arch.fp_state = hyp_vcpu->vcpu.arch.fp_state; host_cpu_if->vgic_hcr = hyp_cpu_if->vgic_hcr; for (i = 0; i < hyp_cpu_if->used_lrs; ++i) diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 544a419b9a39..1f82d531a494 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -337,7 +337,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) __sysreg_restore_state_nvhe(host_ctxt); - if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED) + if (*host_data_ptr(fp_owner) == FP_STATE_GUEST_OWNED) __fpsimd_save_fpexc32(vcpu); __debug_switch_to_host(vcpu); diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 14b7a6bc5909..b92f9fe2d50e 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -258,7 +258,7 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) sysreg_restore_host_state_vhe(host_ctxt); - if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED) + if (*host_data_ptr(fp_owner) == FP_STATE_GUEST_OWNED) __fpsimd_save_fpexc32(vcpu); __debug_switch_to_host(vcpu); From 9a39359903fea9c354d89dce81ffd952859c90dc Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 2 May 2024 16:40:30 +0100 Subject: [PATCH 6/6] KVM: arm64: Move management of __hyp_running_vcpu to load/put on VHE The per-CPU host context structure contains a __hyp_running_vcpu that serves as a replacement for kvm_get_current_vcpu() in contexts where we cannot make direct use of it (such as in the nVHE hypervisor). Since there is a lot of common code between nVHE and VHE, the latter also populates this field even if kvm_get_running_vcpu() always works. We currently pretty inconsistent when populating __hyp_running_vcpu to point to the currently running vcpu: - on {n,h}VHE, we set __hyp_running_vcpu on entry to __kvm_vcpu_run and clear it on exit. - on VHE, we set __hyp_running_vcpu on entry to __kvm_vcpu_run_vhe and never clear it, effectively leaving a dangling pointer... VHE is obviously the odd one here. Although we could make it behave just like nVHE, this wouldn't match the behaviour of KVM with VHE, where the load phase is where most of the context-switch gets done. So move all the __hyp_running_vcpu management to the VHE-specific load/put phases, giving us a bit more sanity and matching the behaviour of kvm_get_running_vcpu(). Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20240502154030.3011995-1-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/vhe/switch.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index b92f9fe2d50e..0027cc9b985e 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -162,6 +162,8 @@ static void __vcpu_put_deactivate_traps(struct kvm_vcpu *vcpu) void kvm_vcpu_load_vhe(struct kvm_vcpu *vcpu) { + host_data_ptr(host_ctxt)->__hyp_running_vcpu = vcpu; + __vcpu_load_switch_sysregs(vcpu); __vcpu_load_activate_traps(vcpu); __load_stage2(vcpu->arch.hw_mmu, vcpu->arch.hw_mmu->arch); @@ -171,6 +173,8 @@ void kvm_vcpu_put_vhe(struct kvm_vcpu *vcpu) { __vcpu_put_deactivate_traps(vcpu); __vcpu_put_switch_sysregs(vcpu); + + host_data_ptr(host_ctxt)->__hyp_running_vcpu = NULL; } static const exit_handler_fn hyp_exit_handlers[] = { @@ -222,7 +226,6 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) u64 exit_code; host_ctxt = host_data_ptr(host_ctxt); - host_ctxt->__hyp_running_vcpu = vcpu; guest_ctxt = &vcpu->arch.ctxt; sysreg_save_host_state_vhe(host_ctxt);