From f12b54d7c24388886277598236b3eeea5c68eec4 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 11 Apr 2025 11:54:22 +0100 Subject: [PATCH 1/7] KVM: arm64: Repaint pmcr_n into nr_pmu_counters The pmcr_n field obviously refers to PMCR_EL0.N, but is generally used as the number of counters seen by the guest. Rename it accordingly. Suggested-by: Oliver upton Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 4 ++-- arch/arm64/kvm/pmu-emul.c | 6 +++--- arch/arm64/kvm/sys_regs.c | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index e98cfe7855a6..81a0dcf56d2b 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -359,8 +359,8 @@ struct kvm_arch { cpumask_var_t supported_cpus; - /* PMCR_EL0.N value for the guest */ - u8 pmcr_n; + /* Maximum number of counters for the guest */ + u8 nr_pmu_counters; /* Iterator for idreg debugfs */ u8 idreg_debugfs_iter; diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index a1bc10d7116a..60b5a5e4a6c5 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -280,7 +280,7 @@ static u64 kvm_pmu_hyp_counter_mask(struct kvm_vcpu *vcpu) return 0; hpmn = SYS_FIELD_GET(MDCR_EL2, HPMN, __vcpu_sys_reg(vcpu, MDCR_EL2)); - n = vcpu->kvm->arch.pmcr_n; + n = vcpu->kvm->arch.nr_pmu_counters; /* * Programming HPMN to a value greater than PMCR_EL0.N is @@ -1032,7 +1032,7 @@ static void kvm_arm_set_pmu(struct kvm *kvm, struct arm_pmu *arm_pmu) lockdep_assert_held(&kvm->arch.config_lock); kvm->arch.arm_pmu = arm_pmu; - kvm->arch.pmcr_n = kvm_arm_pmu_get_max_counters(kvm); + kvm->arch.nr_pmu_counters = kvm_arm_pmu_get_max_counters(kvm); } /** @@ -1261,7 +1261,7 @@ u64 kvm_vcpu_read_pmcr(struct kvm_vcpu *vcpu) { u64 pmcr = __vcpu_sys_reg(vcpu, PMCR_EL0); - return u64_replace_bits(pmcr, vcpu->kvm->arch.pmcr_n, ARMV8_PMU_PMCR_N); + return u64_replace_bits(pmcr, vcpu->kvm->arch.nr_pmu_counters, ARMV8_PMU_PMCR_N); } void kvm_pmu_nested_transition(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 005ad28f7306..aec7d9104cfe 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -785,7 +785,7 @@ static unsigned int pmu_visibility(const struct kvm_vcpu *vcpu, static u64 reset_pmu_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { u64 mask = BIT(ARMV8_PMU_CYCLE_IDX); - u8 n = vcpu->kvm->arch.pmcr_n; + u8 n = vcpu->kvm->arch.nr_pmu_counters; if (n) mask |= GENMASK(n - 1, 0); @@ -1217,7 +1217,7 @@ static int set_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, */ if (!kvm_vm_has_ran_once(kvm) && new_n <= kvm_arm_pmu_get_max_counters(kvm)) - kvm->arch.pmcr_n = new_n; + kvm->arch.nr_pmu_counters = new_n; mutex_unlock(&kvm->arch.config_lock); From c8823e51b534d490ec27d372596eb35d2bb7193c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 17 Feb 2025 10:17:20 +0000 Subject: [PATCH 2/7] KVM: arm64: Fix MDCR_EL2.HPMN reset value The MDCR_EL2 documentation indicates that the HPMN field has the following behaviour: "On a Warm reset, this field resets to the expression NUM_PMU_COUNTERS." However, it appears we reset it to zero, which is not very useful. Add a reset helper for MDCR_EL2, and handle the case where userspace changes the target PMU, which may force us to change HPMN again. Reported-by: Joey Gouly Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier --- arch/arm64/kvm/pmu-emul.c | 20 +++++++++++++++++++- arch/arm64/kvm/sys_regs.c | 8 +++++++- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 60b5a5e4a6c5..2df54508f5ae 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -1027,12 +1027,30 @@ u8 kvm_arm_pmu_get_max_counters(struct kvm *kvm) return bitmap_weight(arm_pmu->cntr_mask, ARMV8_PMU_MAX_GENERAL_COUNTERS); } +static void kvm_arm_set_nr_counters(struct kvm *kvm, unsigned int nr) +{ + kvm->arch.nr_pmu_counters = nr; + + /* Reset MDCR_EL2.HPMN behind the vcpus' back... */ + if (test_bit(KVM_ARM_VCPU_HAS_EL2, kvm->arch.vcpu_features)) { + struct kvm_vcpu *vcpu; + unsigned long i; + + kvm_for_each_vcpu(i, vcpu, kvm) { + u64 val = __vcpu_sys_reg(vcpu, MDCR_EL2); + val &= ~MDCR_EL2_HPMN; + val |= FIELD_PREP(MDCR_EL2_HPMN, kvm->arch.nr_pmu_counters); + __vcpu_sys_reg(vcpu, MDCR_EL2) = val; + } + } +} + static void kvm_arm_set_pmu(struct kvm *kvm, struct arm_pmu *arm_pmu) { lockdep_assert_held(&kvm->arch.config_lock); kvm->arch.arm_pmu = arm_pmu; - kvm->arch.nr_pmu_counters = kvm_arm_pmu_get_max_counters(kvm); + kvm_arm_set_nr_counters(kvm, kvm_arm_pmu_get_max_counters(kvm)); } /** diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index aec7d9104cfe..1e4265172d9d 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2698,6 +2698,12 @@ static int set_imp_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, .set_user = set_imp_id_reg, \ .reset = reset_imp_id_reg, \ .val = mask, \ + } + +static u64 reset_mdcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) +{ + __vcpu_sys_reg(vcpu, r->reg) = vcpu->kvm->arch.nr_pmu_counters; + return vcpu->kvm->arch.nr_pmu_counters; } /* @@ -3243,7 +3249,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { EL2_REG(SCTLR_EL2, access_rw, reset_val, SCTLR_EL2_RES1), EL2_REG(ACTLR_EL2, access_rw, reset_val, 0), EL2_REG_VNCR(HCR_EL2, reset_hcr, 0), - EL2_REG(MDCR_EL2, access_mdcr, reset_val, 0), + EL2_REG(MDCR_EL2, access_mdcr, reset_mdcr, 0), EL2_REG(CPTR_EL2, access_rw, reset_val, CPTR_NVHE_EL2_RES1), EL2_REG_VNCR(HSTR_EL2, reset_val, 0), EL2_REG_VNCR(HFGRTR_EL2, reset_val, 0), From 022435334393d56f4d6bc398cf16430067807b0a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 17 Feb 2025 10:24:43 +0000 Subject: [PATCH 3/7] KVM: arm64: Contextualise the handling of PMCR_EL0.P writes Contrary to what the comment says in kvm_pmu_handle_pmcr(), writing PMCR_EL0.P==1 has the following effects: The event counters affected by this field are: * All event counters in the first range. * If any of the following are true, all event counters in the second range: - EL2 is disabled or not implemented in the current Security state. - The PE is executing at EL2 or EL3. where the "first range" represent the counters in the [0..HPMN-1] range, and the "second range" the counters in the [HPMN..MAX] range. It so appears that writing P from EL2 should nuke all counters, and not just the "guest" view. Just do that, and nuke the misleading comment. Reported-by: Joey Gouly Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier --- arch/arm64/kvm/pmu-emul.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 2df54508f5ae..2336d9c8bd5e 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -608,14 +608,12 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0); if (val & ARMV8_PMU_PMCR_P) { - /* - * Unlike other PMU sysregs, the controls in PMCR_EL0 always apply - * to the 'guest' range of counters and never the 'hyp' range. - */ unsigned long mask = kvm_pmu_implemented_counter_mask(vcpu) & - ~kvm_pmu_hyp_counter_mask(vcpu) & ~BIT(ARMV8_PMU_CYCLE_IDX); + if (!vcpu_is_el2(vcpu)) + mask &= ~kvm_pmu_hyp_counter_mask(vcpu); + for_each_set_bit(i, &mask, 32) kvm_pmu_set_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, i), 0, true); } From b7628c7973765c856866b3047c9002ae0825add6 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 9 Apr 2025 11:53:15 +0100 Subject: [PATCH 4/7] KVM: arm64: Allow userspace to limit the number of PMU counters for EL2 VMs As long as we had purely EL1 VMs, we could easily update the number of guest-visible counters by letting userspace write to PMCR_EL0.N. With VMs started at EL2, PMCR_EL1.N only reflects MDCR_EL2.HPMN, and we don't have a good way to limit it. For this purpose, introduce a new PMUv3 attribute that allows limiting the maximum number of counters. This requires the explicit selection of a PMU. Suggested-by: Oliver Upton Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier --- Documentation/virt/kvm/devices/vcpu.rst | 24 ++++++++++++++++++++++++ arch/arm64/include/uapi/asm/kvm.h | 9 +++++---- arch/arm64/kvm/pmu-emul.c | 24 ++++++++++++++++++++++++ 3 files changed, 53 insertions(+), 4 deletions(-) diff --git a/Documentation/virt/kvm/devices/vcpu.rst b/Documentation/virt/kvm/devices/vcpu.rst index 31a9576c07af..60bf205cb373 100644 --- a/Documentation/virt/kvm/devices/vcpu.rst +++ b/Documentation/virt/kvm/devices/vcpu.rst @@ -137,6 +137,30 @@ exit_reason = KVM_EXIT_FAIL_ENTRY and populate the fail_entry struct by setting hardare_entry_failure_reason field to KVM_EXIT_FAIL_ENTRY_CPU_UNSUPPORTED and the cpu field to the processor id. +1.5 ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_SET_NR_COUNTERS +-------------------------------------------------- + +:Parameters: in kvm_device_attr.addr the address to an unsigned int + representing the maximum value taken by PMCR_EL0.N + +:Returns: + + ======= ==================================================== + -EBUSY PMUv3 already initialized, a VCPU has already run or + an event filter has already been set + -EFAULT Error accessing the value pointed to by addr + -ENODEV PMUv3 not supported or GIC not initialized + -EINVAL No PMUv3 explicitly selected, or value of N out of + range + ======= ==================================================== + +Set the number of implemented event counters in the virtual PMU. This +mandates that a PMU has explicitly been selected via +KVM_ARM_VCPU_PMU_V3_SET_PMU, and will fail when no PMU has been +explicitly selected, or the number of counters is out of range for the +selected PMU. Selecting a new PMU cancels the effect of setting this +attribute. + 2. GROUP: KVM_ARM_VCPU_TIMER_CTRL ================================= diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index af9d9acaf997..ed5f3892674c 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -431,10 +431,11 @@ enum { /* Device Control API on vcpu fd */ #define KVM_ARM_VCPU_PMU_V3_CTRL 0 -#define KVM_ARM_VCPU_PMU_V3_IRQ 0 -#define KVM_ARM_VCPU_PMU_V3_INIT 1 -#define KVM_ARM_VCPU_PMU_V3_FILTER 2 -#define KVM_ARM_VCPU_PMU_V3_SET_PMU 3 +#define KVM_ARM_VCPU_PMU_V3_IRQ 0 +#define KVM_ARM_VCPU_PMU_V3_INIT 1 +#define KVM_ARM_VCPU_PMU_V3_FILTER 2 +#define KVM_ARM_VCPU_PMU_V3_SET_PMU 3 +#define KVM_ARM_VCPU_PMU_V3_SET_NR_COUNTERS 4 #define KVM_ARM_VCPU_TIMER_CTRL 1 #define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0 #define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1 diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 2336d9c8bd5e..efc59f343acc 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -1104,6 +1104,20 @@ static int kvm_arm_pmu_v3_set_pmu(struct kvm_vcpu *vcpu, int pmu_id) return ret; } +static int kvm_arm_pmu_v3_set_nr_counters(struct kvm_vcpu *vcpu, unsigned int n) +{ + struct kvm *kvm = vcpu->kvm; + + if (!kvm->arch.arm_pmu) + return -EINVAL; + + if (n > kvm_arm_pmu_get_max_counters(kvm)) + return -EINVAL; + + kvm_arm_set_nr_counters(kvm, n); + return 0; +} + int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { struct kvm *kvm = vcpu->kvm; @@ -1200,6 +1214,15 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) return kvm_arm_pmu_v3_set_pmu(vcpu, pmu_id); } + case KVM_ARM_VCPU_PMU_V3_SET_NR_COUNTERS: { + unsigned int __user *uaddr = (unsigned int __user *)(long)attr->addr; + unsigned int n; + + if (get_user(n, uaddr)) + return -EFAULT; + + return kvm_arm_pmu_v3_set_nr_counters(vcpu, n); + } case KVM_ARM_VCPU_PMU_V3_INIT: return kvm_arm_pmu_v3_init(vcpu); } @@ -1238,6 +1261,7 @@ int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) case KVM_ARM_VCPU_PMU_V3_INIT: case KVM_ARM_VCPU_PMU_V3_FILTER: case KVM_ARM_VCPU_PMU_V3_SET_PMU: + case KVM_ARM_VCPU_PMU_V3_SET_NR_COUNTERS: if (kvm_vcpu_has_pmu(vcpu)) return 0; } From cd84a42c6703f69a865bd8e48ee8107eb80e0dea Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 9 Apr 2025 14:24:34 +0100 Subject: [PATCH 5/7] KVM: arm64: Don't let userspace write to PMCR_EL0.N when the vcpu has EL2 Now that userspace can provide its limit for hte maximum number of counters, prevent it from writing to PMCR_EL0.N, as the value should be derived from MDCR_EL2.HPMN in that case. Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 1e4265172d9d..228a548f0a5b 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1216,6 +1216,7 @@ static int set_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, * with the existing KVM behavior. */ if (!kvm_vm_has_ran_once(kvm) && + !vcpu_has_nv(vcpu) && new_n <= kvm_arm_pmu_get_max_counters(kvm)) kvm->arch.nr_pmu_counters = new_n; From efff9dd2fee7a5969b5b2a04995e638c3ba15826 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 9 Apr 2025 14:30:05 +0100 Subject: [PATCH 6/7] KVM: arm64: Handle out-of-bound write to MDCR_EL2.HPMN We don't really pay attention to what gets written to MDCR_EL2.HPMN, and funky guests could play ugly games on us. Restrict what gets written there, and limit the number of counters to what the PMU is allowed to have. Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 228a548f0a5b..f3e603ef2a95 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2571,16 +2571,33 @@ static bool access_mdcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) { - u64 old = __vcpu_sys_reg(vcpu, MDCR_EL2); + u64 hpmn, val, old = __vcpu_sys_reg(vcpu, MDCR_EL2); - if (!access_rw(vcpu, p, r)) - return false; + if (!p->is_write) { + p->regval = old; + return true; + } + + val = p->regval; + hpmn = FIELD_GET(MDCR_EL2_HPMN, val); /* - * Request a reload of the PMU to enable/disable the counters affected - * by HPME. + * If HPMN is out of bounds, limit it to what we actually + * support. This matches the UNKNOWN definition of the field + * in that case, and keeps the emulation simple. Sort of. */ - if ((old ^ __vcpu_sys_reg(vcpu, MDCR_EL2)) & MDCR_EL2_HPME) + if (hpmn > vcpu->kvm->arch.nr_pmu_counters) { + hpmn = vcpu->kvm->arch.nr_pmu_counters; + u64_replace_bits(val, hpmn, MDCR_EL2_HPMN); + } + + __vcpu_sys_reg(vcpu, MDCR_EL2) = val; + + /* + * Request a reload of the PMU to enable/disable the counters + * affected by HPME. + */ + if ((old ^ val) & MDCR_EL2_HPME) kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu); return true; From 600f6fa5c90c05b4f2f60cde24fcea6a7239c41f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 11 Apr 2025 12:02:20 +0100 Subject: [PATCH 7/7] KVM: arm64: Let kvm_vcpu_read_pmcr() return an EL-dependent value for PMCR_EL0.N When EL2 is present, PMCR_EL0.N is the effective value of MDCR_EL2.HPMN when accessed from EL1 or EL0. Make sure we honor this requirement. Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier --- arch/arm64/kvm/pmu-emul.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index efc59f343acc..25c29107f13f 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -1300,8 +1300,12 @@ u8 kvm_arm_pmu_get_pmuver_limit(void) u64 kvm_vcpu_read_pmcr(struct kvm_vcpu *vcpu) { u64 pmcr = __vcpu_sys_reg(vcpu, PMCR_EL0); + u64 n = vcpu->kvm->arch.nr_pmu_counters; - return u64_replace_bits(pmcr, vcpu->kvm->arch.nr_pmu_counters, ARMV8_PMU_PMCR_N); + if (vcpu_has_nv(vcpu) && !vcpu_is_el2(vcpu)) + n = FIELD_GET(MDCR_EL2_HPMN, __vcpu_sys_reg(vcpu, MDCR_EL2)); + + return u64_replace_bits(pmcr, n, ARMV8_PMU_PMCR_N); } void kvm_pmu_nested_transition(struct kvm_vcpu *vcpu)