Merge branch kvm-arm64/spe-trbe-nvhe into kvmarm-master/next

* kvm-arm64/spe-trbe-nvhe:
  : .
  : Fix SPE and TRBE nVHE world switch which can otherwise result in
  : pretty bad behaviours, as they have the nasty habit of performing
  : out of context speculative page table walks.
  :
  : Patches courtesy of Will Deacon.
  : .
  KVM: arm64: Don't pass host_debug_state to BRBE world-switch routines
  KVM: arm64: Disable SPE Profiling Buffer when running in guest context
  KVM: arm64: Disable TRBE Trace Buffer Unit when running in guest context

Signed-off-by: Marc Zyngier <maz@kernel.org>
This commit is contained in:
Marc Zyngier 2026-04-08 12:23:58 +01:00
commit 73bb0bc2f4
3 changed files with 95 additions and 25 deletions

View File

@ -790,8 +790,10 @@ struct kvm_host_data {
struct kvm_guest_debug_arch regs;
/* Statistical profiling extension */
u64 pmscr_el1;
u64 pmblimitr_el1;
/* Self-hosted trace */
u64 trfcr_el1;
u64 trblimitr_el1;
/* Values of trap registers for the host before guest entry. */
u64 mdcr_el2;
u64 brbcr_el1;

View File

@ -14,20 +14,20 @@
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
static void __debug_save_spe(u64 *pmscr_el1)
static void __debug_save_spe(void)
{
u64 reg;
u64 *pmscr_el1, *pmblimitr_el1;
/* Clear pmscr in case of early return */
*pmscr_el1 = 0;
pmscr_el1 = host_data_ptr(host_debug_state.pmscr_el1);
pmblimitr_el1 = host_data_ptr(host_debug_state.pmblimitr_el1);
/*
* At this point, we know that this CPU implements
* SPE and is available to the host.
* Check if the host is actually using it ?
*/
reg = read_sysreg_s(SYS_PMBLIMITR_EL1);
if (!(reg & BIT(PMBLIMITR_EL1_E_SHIFT)))
*pmblimitr_el1 = read_sysreg_s(SYS_PMBLIMITR_EL1);
if (!(*pmblimitr_el1 & BIT(PMBLIMITR_EL1_E_SHIFT)))
return;
/* Yes; save the control register and disable data generation */
@ -37,18 +37,29 @@ static void __debug_save_spe(u64 *pmscr_el1)
/* Now drain all buffered data to memory */
psb_csync();
dsb(nsh);
/* And disable the profiling buffer */
write_sysreg_s(0, SYS_PMBLIMITR_EL1);
isb();
}
static void __debug_restore_spe(u64 pmscr_el1)
static void __debug_restore_spe(void)
{
if (!pmscr_el1)
u64 pmblimitr_el1 = *host_data_ptr(host_debug_state.pmblimitr_el1);
if (!(pmblimitr_el1 & BIT(PMBLIMITR_EL1_E_SHIFT)))
return;
/* The host page table is installed, but not yet synchronised */
isb();
/* Re-enable the profiling buffer. */
write_sysreg_s(pmblimitr_el1, SYS_PMBLIMITR_EL1);
isb();
/* Re-enable data generation */
write_sysreg_el1(pmscr_el1, SYS_PMSCR);
write_sysreg_el1(*host_data_ptr(host_debug_state.pmscr_el1), SYS_PMSCR);
}
static void __trace_do_switch(u64 *saved_trfcr, u64 new_trfcr)
@ -57,12 +68,54 @@ static void __trace_do_switch(u64 *saved_trfcr, u64 new_trfcr)
write_sysreg_el1(new_trfcr, SYS_TRFCR);
}
static bool __trace_needs_drain(void)
static void __trace_drain_and_disable(void)
{
if (is_protected_kvm_enabled() && host_data_test_flag(HAS_TRBE))
return read_sysreg_s(SYS_TRBLIMITR_EL1) & TRBLIMITR_EL1_E;
u64 *trblimitr_el1 = host_data_ptr(host_debug_state.trblimitr_el1);
bool needs_drain = is_protected_kvm_enabled() ?
host_data_test_flag(HAS_TRBE) :
host_data_test_flag(TRBE_ENABLED);
return host_data_test_flag(TRBE_ENABLED);
if (!needs_drain) {
*trblimitr_el1 = 0;
return;
}
*trblimitr_el1 = read_sysreg_s(SYS_TRBLIMITR_EL1);
if (*trblimitr_el1 & TRBLIMITR_EL1_E) {
/*
* The host has enabled the Trace Buffer Unit so we have
* to beat the CPU with a stick until it stops accessing
* memory.
*/
/* First, ensure that our prior write to TRFCR has stuck. */
isb();
/* Now synchronise with the trace and drain the buffer. */
tsb_csync();
dsb(nsh);
/*
* With no more trace being generated, we can disable the
* Trace Buffer Unit.
*/
write_sysreg_s(0, SYS_TRBLIMITR_EL1);
if (cpus_have_final_cap(ARM64_WORKAROUND_2064142)) {
/*
* Some CPUs are so good, we have to drain 'em
* twice.
*/
tsb_csync();
dsb(nsh);
}
/*
* Ensure that the Trace Buffer Unit is disabled before
* we start mucking with the stage-2 and trap
* configuration.
*/
isb();
}
}
static bool __trace_needs_switch(void)
@ -79,21 +132,34 @@ static void __trace_switch_to_guest(void)
__trace_do_switch(host_data_ptr(host_debug_state.trfcr_el1),
*host_data_ptr(trfcr_while_in_guest));
if (__trace_needs_drain()) {
isb();
tsb_csync();
}
__trace_drain_and_disable();
}
static void __trace_switch_to_host(void)
{
u64 trblimitr_el1 = *host_data_ptr(host_debug_state.trblimitr_el1);
if (trblimitr_el1 & TRBLIMITR_EL1_E) {
/* Re-enable the Trace Buffer Unit for the host. */
write_sysreg_s(trblimitr_el1, SYS_TRBLIMITR_EL1);
isb();
if (cpus_have_final_cap(ARM64_WORKAROUND_2038923)) {
/*
* Make sure the unit is re-enabled before we
* poke TRFCR.
*/
isb();
}
}
__trace_do_switch(host_data_ptr(trfcr_while_in_guest),
*host_data_ptr(host_debug_state.trfcr_el1));
}
static void __debug_save_brbe(u64 *brbcr_el1)
static void __debug_save_brbe(void)
{
u64 *brbcr_el1 = host_data_ptr(host_debug_state.brbcr_el1);
*brbcr_el1 = 0;
/* Check if the BRBE is enabled */
@ -109,8 +175,10 @@ static void __debug_save_brbe(u64 *brbcr_el1)
write_sysreg_el1(0, SYS_BRBCR);
}
static void __debug_restore_brbe(u64 brbcr_el1)
static void __debug_restore_brbe(void)
{
u64 brbcr_el1 = *host_data_ptr(host_debug_state.brbcr_el1);
if (!brbcr_el1)
return;
@ -122,11 +190,11 @@ void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu)
{
/* Disable and flush SPE data generation */
if (host_data_test_flag(HAS_SPE))
__debug_save_spe(host_data_ptr(host_debug_state.pmscr_el1));
__debug_save_spe();
/* Disable BRBE branch records */
if (host_data_test_flag(HAS_BRBE))
__debug_save_brbe(host_data_ptr(host_debug_state.brbcr_el1));
__debug_save_brbe();
if (__trace_needs_switch())
__trace_switch_to_guest();
@ -140,9 +208,9 @@ void __debug_switch_to_guest(struct kvm_vcpu *vcpu)
void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu)
{
if (host_data_test_flag(HAS_SPE))
__debug_restore_spe(*host_data_ptr(host_debug_state.pmscr_el1));
__debug_restore_spe();
if (host_data_test_flag(HAS_BRBE))
__debug_restore_brbe(*host_data_ptr(host_debug_state.brbcr_el1));
__debug_restore_brbe();
if (__trace_needs_switch())
__trace_switch_to_host();
}

View File

@ -293,7 +293,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
* We're about to restore some new MMU state. Make sure
* ongoing page-table walks that have started before we
* trapped to EL2 have completed. This also synchronises the
* above disabling of BRBE, SPE and TRBE.
* above disabling of BRBE.
*
* See DDI0487I.a D8.1.5 "Out-of-context translation regimes",
* rule R_LFHQG and subsequent information statements.