From d017eeabd5092565c3dd1c8a7b00ba724c33c18f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 13 Nov 2022 16:38:17 +0000 Subject: [PATCH 01/20] arm64: Add ID_DFR0_EL1.PerfMon values for PMUv3p7 and IMP_DEF Align the ID_DFR0_EL1.PerfMon values with ID_AA64DFR0_EL1.PMUver. Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221113163832.3154370-2-maz@kernel.org --- arch/arm64/include/asm/sysreg.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 7d301700d1a9..84f59ce1dc6d 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -698,6 +698,8 @@ #define ID_DFR0_PERFMON_8_1 0x4 #define ID_DFR0_PERFMON_8_4 0x5 #define ID_DFR0_PERFMON_8_5 0x6 +#define ID_DFR0_PERFMON_8_7 0x7 +#define ID_DFR0_PERFMON_IMP_DEF 0xf #define ID_ISAR4_SWP_FRAC_SHIFT 28 #define ID_ISAR4_PSR_M_SHIFT 24 From bead02204e9806807bb290137b1ccabfcb4b16fd Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 13 Nov 2022 16:38:18 +0000 Subject: [PATCH 02/20] KVM: arm64: PMU: Align chained counter implementation with architecture pseudocode Ricardo recently pointed out that the PMU chained counter emulation in KVM wasn't quite behaving like the one on actual hardware, in the sense that a chained counter would expose an overflow on both halves of a chained counter, while KVM would only expose the overflow on the top half. The difference is subtle, but significant. What does the architecture say (DDI0087 H.a): - Up to PMUv3p4, all counters but the cycle counter are 32bit - A 32bit counter that overflows generates a CHAIN event on the adjacent counter after exposing its own overflow status - The CHAIN event is accounted if the counter is correctly configured (CHAIN event selected and counter enabled) This all means that our current implementation (which uses 64bit perf events) prevents us from emulating this overflow on the lower half. How to fix this? By implementing the above, to the letter. This largely results in code deletion, removing the notions of "counter pair", "chained counters", and "canonical counter". The code is further restructured to make the CHAIN handling similar to SWINC, as the two are now extremely similar in behaviour. Reported-by: Ricardo Koller Signed-off-by: Marc Zyngier Reviewed-by: Reiji Watanabe Link: https://lore.kernel.org/r/20221113163832.3154370-3-maz@kernel.org --- arch/arm64/kvm/pmu-emul.c | 322 ++++++++++---------------------------- include/kvm/arm_pmu.h | 2 - 2 files changed, 87 insertions(+), 237 deletions(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 0003c7d37533..57765be69bea 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -15,16 +15,14 @@ #include #include +#define PERF_ATTR_CFG1_COUNTER_64BIT BIT(0) + DEFINE_STATIC_KEY_FALSE(kvm_arm_pmu_available); static LIST_HEAD(arm_pmus); static DEFINE_MUTEX(arm_pmus_lock); static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx); -static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx); -static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc); - -#define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1 static u32 kvm_pmu_event_mask(struct kvm *kvm) { @@ -57,6 +55,11 @@ static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx) __vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_LC); } +static bool kvm_pmu_counter_can_chain(struct kvm_vcpu *vcpu, u64 idx) +{ + return (!(idx & 1) && (idx + 1) < ARMV8_PMU_CYCLE_IDX); +} + static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc) { struct kvm_pmu *pmu; @@ -69,91 +72,22 @@ static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc) } /** - * kvm_pmu_pmc_is_chained - determine if the pmc is chained - * @pmc: The PMU counter pointer - */ -static bool kvm_pmu_pmc_is_chained(struct kvm_pmc *pmc) -{ - struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); - - return test_bit(pmc->idx >> 1, vcpu->arch.pmu.chained); -} - -/** - * kvm_pmu_idx_is_high_counter - determine if select_idx is a high/low counter - * @select_idx: The counter index - */ -static bool kvm_pmu_idx_is_high_counter(u64 select_idx) -{ - return select_idx & 0x1; -} - -/** - * kvm_pmu_get_canonical_pmc - obtain the canonical pmc - * @pmc: The PMU counter pointer - * - * When a pair of PMCs are chained together we use the low counter (canonical) - * to hold the underlying perf event. - */ -static struct kvm_pmc *kvm_pmu_get_canonical_pmc(struct kvm_pmc *pmc) -{ - if (kvm_pmu_pmc_is_chained(pmc) && - kvm_pmu_idx_is_high_counter(pmc->idx)) - return pmc - 1; - - return pmc; -} -static struct kvm_pmc *kvm_pmu_get_alternate_pmc(struct kvm_pmc *pmc) -{ - if (kvm_pmu_idx_is_high_counter(pmc->idx)) - return pmc - 1; - else - return pmc + 1; -} - -/** - * kvm_pmu_idx_has_chain_evtype - determine if the event type is chain + * kvm_pmu_get_counter_value - get PMU counter value * @vcpu: The vcpu pointer * @select_idx: The counter index */ -static bool kvm_pmu_idx_has_chain_evtype(struct kvm_vcpu *vcpu, u64 select_idx) +u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) { - u64 eventsel, reg; + u64 counter, reg, enabled, running; + struct kvm_pmu *pmu = &vcpu->arch.pmu; + struct kvm_pmc *pmc = &pmu->pmc[select_idx]; - select_idx |= 0x1; + if (!kvm_vcpu_has_pmu(vcpu)) + return 0; - if (select_idx == ARMV8_PMU_CYCLE_IDX) - return false; - - reg = PMEVTYPER0_EL0 + select_idx; - eventsel = __vcpu_sys_reg(vcpu, reg) & kvm_pmu_event_mask(vcpu->kvm); - - return eventsel == ARMV8_PMUV3_PERFCTR_CHAIN; -} - -/** - * kvm_pmu_get_pair_counter_value - get PMU counter value - * @vcpu: The vcpu pointer - * @pmc: The PMU counter pointer - */ -static u64 kvm_pmu_get_pair_counter_value(struct kvm_vcpu *vcpu, - struct kvm_pmc *pmc) -{ - u64 counter, counter_high, reg, enabled, running; - - if (kvm_pmu_pmc_is_chained(pmc)) { - pmc = kvm_pmu_get_canonical_pmc(pmc); - reg = PMEVCNTR0_EL0 + pmc->idx; - - counter = __vcpu_sys_reg(vcpu, reg); - counter_high = __vcpu_sys_reg(vcpu, reg + 1); - - counter = lower_32_bits(counter) | (counter_high << 32); - } else { - reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX) - ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx; - counter = __vcpu_sys_reg(vcpu, reg); - } + reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX) + ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx; + counter = __vcpu_sys_reg(vcpu, reg); /* * The real counter value is equal to the value of counter register plus @@ -163,29 +97,7 @@ static u64 kvm_pmu_get_pair_counter_value(struct kvm_vcpu *vcpu, counter += perf_event_read_value(pmc->perf_event, &enabled, &running); - return counter; -} - -/** - * kvm_pmu_get_counter_value - get PMU counter value - * @vcpu: The vcpu pointer - * @select_idx: The counter index - */ -u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) -{ - u64 counter; - struct kvm_pmu *pmu = &vcpu->arch.pmu; - struct kvm_pmc *pmc = &pmu->pmc[select_idx]; - - if (!kvm_vcpu_has_pmu(vcpu)) - return 0; - - counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); - - if (kvm_pmu_pmc_is_chained(pmc) && - kvm_pmu_idx_is_high_counter(select_idx)) - counter = upper_32_bits(counter); - else if (select_idx != ARMV8_PMU_CYCLE_IDX) + if (select_idx != ARMV8_PMU_CYCLE_IDX) counter = lower_32_bits(counter); return counter; @@ -218,7 +130,6 @@ void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) */ static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc) { - pmc = kvm_pmu_get_canonical_pmc(pmc); if (pmc->perf_event) { perf_event_disable(pmc->perf_event); perf_event_release_kernel(pmc->perf_event); @@ -236,11 +147,10 @@ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc) { u64 counter, reg, val; - pmc = kvm_pmu_get_canonical_pmc(pmc); if (!pmc->perf_event) return; - counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); + counter = kvm_pmu_get_counter_value(vcpu, pmc->idx); if (pmc->idx == ARMV8_PMU_CYCLE_IDX) { reg = PMCCNTR_EL0; @@ -252,9 +162,6 @@ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc) __vcpu_sys_reg(vcpu, reg) = val; - if (kvm_pmu_pmc_is_chained(pmc)) - __vcpu_sys_reg(vcpu, reg + 1) = upper_32_bits(counter); - kvm_pmu_release_perf_event(pmc); } @@ -285,8 +192,6 @@ void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) for_each_set_bit(i, &mask, 32) kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]); - - bitmap_zero(vcpu->arch.pmu.chained, ARMV8_PMU_MAX_COUNTER_PAIRS); } /** @@ -340,12 +245,9 @@ void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val) pmc = &pmu->pmc[i]; - /* A change in the enable state may affect the chain state */ - kvm_pmu_update_pmc_chained(vcpu, i); - kvm_pmu_create_perf_event(vcpu, i); - - /* At this point, pmc must be the canonical */ - if (pmc->perf_event) { + if (!pmc->perf_event) { + kvm_pmu_create_perf_event(vcpu, i); + } else { perf_event_enable(pmc->perf_event); if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE) kvm_debug("fail to enable perf event\n"); @@ -375,11 +277,6 @@ void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val) pmc = &pmu->pmc[i]; - /* A change in the enable state may affect the chain state */ - kvm_pmu_update_pmc_chained(vcpu, i); - kvm_pmu_create_perf_event(vcpu, i); - - /* At this point, pmc must be the canonical */ if (pmc->perf_event) perf_event_disable(pmc->perf_event); } @@ -484,6 +381,48 @@ static void kvm_pmu_perf_overflow_notify_vcpu(struct irq_work *work) kvm_vcpu_kick(vcpu); } +/* + * Perform an increment on any of the counters described in @mask, + * generating the overflow if required, and propagate it as a chained + * event if possible. + */ +static void kvm_pmu_counter_increment(struct kvm_vcpu *vcpu, + unsigned long mask, u32 event) +{ + int i; + + if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) + return; + + /* Weed out disabled counters */ + mask &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); + + for_each_set_bit(i, &mask, ARMV8_PMU_CYCLE_IDX) { + u64 type, reg; + + /* Filter on event type */ + type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i); + type &= kvm_pmu_event_mask(vcpu->kvm); + if (type != event) + continue; + + /* Increment this counter */ + reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1; + reg = lower_32_bits(reg); + __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg; + + if (reg) /* No overflow? move on */ + continue; + + /* Mark overflow */ + __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i); + + if (kvm_pmu_counter_can_chain(vcpu, i)) + kvm_pmu_counter_increment(vcpu, BIT(i + 1), + ARMV8_PMUV3_PERFCTR_CHAIN); + } +} + /** * When the perf event overflows, set the overflow status and inform the vcpu. */ @@ -514,6 +453,10 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event, __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx); + if (kvm_pmu_counter_can_chain(vcpu, idx)) + kvm_pmu_counter_increment(vcpu, BIT(idx + 1), + ARMV8_PMUV3_PERFCTR_CHAIN); + if (kvm_pmu_overflow_status(vcpu)) { kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); @@ -533,50 +476,7 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event, */ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) { - struct kvm_pmu *pmu = &vcpu->arch.pmu; - int i; - - if (!kvm_vcpu_has_pmu(vcpu)) - return; - - if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) - return; - - /* Weed out disabled counters */ - val &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); - - for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) { - u64 type, reg; - - if (!(val & BIT(i))) - continue; - - /* PMSWINC only applies to ... SW_INC! */ - type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i); - type &= kvm_pmu_event_mask(vcpu->kvm); - if (type != ARMV8_PMUV3_PERFCTR_SW_INCR) - continue; - - /* increment this even SW_INC counter */ - reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1; - reg = lower_32_bits(reg); - __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg; - - if (reg) /* no overflow on the low part */ - continue; - - if (kvm_pmu_pmc_is_chained(&pmu->pmc[i])) { - /* increment the high counter */ - reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) + 1; - reg = lower_32_bits(reg); - __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) = reg; - if (!reg) /* mark overflow on the high counter */ - __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i + 1); - } else { - /* mark overflow on low counter */ - __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i); - } - } + kvm_pmu_counter_increment(vcpu, val, ARMV8_PMUV3_PERFCTR_SW_INCR); } /** @@ -625,18 +525,11 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) { struct arm_pmu *arm_pmu = vcpu->kvm->arch.arm_pmu; struct kvm_pmu *pmu = &vcpu->arch.pmu; - struct kvm_pmc *pmc; + struct kvm_pmc *pmc = &pmu->pmc[select_idx]; struct perf_event *event; struct perf_event_attr attr; u64 eventsel, counter, reg, data; - /* - * For chained counters the event type and filtering attributes are - * obtained from the low/even counter. We also use this counter to - * determine if the event is enabled/disabled. - */ - pmc = kvm_pmu_get_canonical_pmc(&pmu->pmc[select_idx]); - reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX) ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + pmc->idx; data = __vcpu_sys_reg(vcpu, reg); @@ -647,8 +540,12 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) else eventsel = data & kvm_pmu_event_mask(vcpu->kvm); - /* Software increment event doesn't need to be backed by a perf event */ - if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR) + /* + * Neither SW increment nor chained events need to be backed + * by a perf event. + */ + if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR || + eventsel == ARMV8_PMUV3_PERFCTR_CHAIN) return; /* @@ -670,31 +567,22 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) attr.exclude_host = 1; /* Don't count host events */ attr.config = eventsel; - counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); + counter = kvm_pmu_get_counter_value(vcpu, select_idx); - if (kvm_pmu_pmc_is_chained(pmc)) { - /** - * The initial sample period (overflow count) of an event. For - * chained counters we only support overflow interrupts on the - * high counter. - */ + /* + * If counting with a 64bit counter, advertise it to the perf + * code, carefully dealing with the initial sample period. + */ + if (kvm_pmu_idx_is_64bit(vcpu, select_idx)) { + attr.config1 |= PERF_ATTR_CFG1_COUNTER_64BIT; attr.sample_period = (-counter) & GENMASK(63, 0); - attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED; - - event = perf_event_create_kernel_counter(&attr, -1, current, - kvm_pmu_perf_overflow, - pmc + 1); } else { - /* The initial sample period (overflow count) of an event. */ - if (kvm_pmu_idx_is_64bit(vcpu, pmc->idx)) - attr.sample_period = (-counter) & GENMASK(63, 0); - else - attr.sample_period = (-counter) & GENMASK(31, 0); - - event = perf_event_create_kernel_counter(&attr, -1, current, - kvm_pmu_perf_overflow, pmc); + attr.sample_period = (-counter) & GENMASK(31, 0); } + event = perf_event_create_kernel_counter(&attr, -1, current, + kvm_pmu_perf_overflow, pmc); + if (IS_ERR(event)) { pr_err_once("kvm: pmu event creation failed %ld\n", PTR_ERR(event)); @@ -704,41 +592,6 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) pmc->perf_event = event; } -/** - * kvm_pmu_update_pmc_chained - update chained bitmap - * @vcpu: The vcpu pointer - * @select_idx: The number of selected counter - * - * Update the chained bitmap based on the event type written in the - * typer register and the enable state of the odd register. - */ -static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx) -{ - struct kvm_pmu *pmu = &vcpu->arch.pmu; - struct kvm_pmc *pmc = &pmu->pmc[select_idx], *canonical_pmc; - bool new_state, old_state; - - old_state = kvm_pmu_pmc_is_chained(pmc); - new_state = kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx) && - kvm_pmu_counter_is_enabled(vcpu, pmc->idx | 0x1); - - if (old_state == new_state) - return; - - canonical_pmc = kvm_pmu_get_canonical_pmc(pmc); - kvm_pmu_stop_counter(vcpu, canonical_pmc); - if (new_state) { - /* - * During promotion from !chained to chained we must ensure - * the adjacent counter is stopped and its event destroyed - */ - kvm_pmu_stop_counter(vcpu, kvm_pmu_get_alternate_pmc(pmc)); - set_bit(pmc->idx >> 1, vcpu->arch.pmu.chained); - return; - } - clear_bit(pmc->idx >> 1, vcpu->arch.pmu.chained); -} - /** * kvm_pmu_set_counter_event_type - set selected counter to monitor some event * @vcpu: The vcpu pointer @@ -766,7 +619,6 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, __vcpu_sys_reg(vcpu, reg) = data & mask; - kvm_pmu_update_pmc_chained(vcpu, select_idx); kvm_pmu_create_perf_event(vcpu, select_idx); } diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index c0b868ce6a8f..96b192139a23 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -11,7 +11,6 @@ #include #define ARMV8_PMU_CYCLE_IDX (ARMV8_PMU_MAX_COUNTERS - 1) -#define ARMV8_PMU_MAX_COUNTER_PAIRS ((ARMV8_PMU_MAX_COUNTERS + 1) >> 1) #ifdef CONFIG_HW_PERF_EVENTS @@ -29,7 +28,6 @@ struct kvm_pmu { struct irq_work overflow_work; struct kvm_pmu_events events; struct kvm_pmc pmc[ARMV8_PMU_MAX_COUNTERS]; - DECLARE_BITMAP(chained, ARMV8_PMU_MAX_COUNTER_PAIRS); int irq_num; bool created; bool irq_level; From acdd8a4e13a008a83c6da88bb53eecbecda9714c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 13 Nov 2022 16:38:19 +0000 Subject: [PATCH 03/20] KVM: arm64: PMU: Always advertise the CHAIN event Even when the underlying HW doesn't offer the CHAIN event (which happens with QEMU), we can always support it as we're in control of the counter overflow. Always advertise the event via PMCEID0_EL0. Reviewed-by: Reiji Watanabe Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221113163832.3154370-4-maz@kernel.org --- arch/arm64/kvm/pmu-emul.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 57765be69bea..69b67ab3c4bf 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -701,6 +701,8 @@ u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) if (!pmceid1) { val = read_sysreg(pmceid0_el0); + /* always support CHAIN */ + val |= BIT(ARMV8_PMUV3_PERFCTR_CHAIN); base = 0; } else { val = read_sysreg(pmceid1_el0); From c82d28cbf1d4f9fe174041b4485c635cb970afa7 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 13 Nov 2022 16:38:20 +0000 Subject: [PATCH 04/20] KVM: arm64: PMU: Distinguish between 64bit counter and 64bit overflow The PMU architecture makes a subtle difference between a 64bit counter and a counter that has a 64bit overflow. This is for example the case of the cycle counter, which can generate an overflow on a 32bit boundary if PMCR_EL0.LC==0 despite the accumulation being done on 64 bits. Use this distinction in the few cases where it matters in the code, as we will reuse this with PMUv3p5 long counters. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221113163832.3154370-5-maz@kernel.org --- arch/arm64/kvm/pmu-emul.c | 43 ++++++++++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 69b67ab3c4bf..d050143326b5 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -50,6 +50,11 @@ static u32 kvm_pmu_event_mask(struct kvm *kvm) * @select_idx: The counter index */ static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx) +{ + return (select_idx == ARMV8_PMU_CYCLE_IDX); +} + +static bool kvm_pmu_idx_has_64bit_overflow(struct kvm_vcpu *vcpu, u64 select_idx) { return (select_idx == ARMV8_PMU_CYCLE_IDX && __vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_LC); @@ -57,7 +62,8 @@ static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx) static bool kvm_pmu_counter_can_chain(struct kvm_vcpu *vcpu, u64 idx) { - return (!(idx & 1) && (idx + 1) < ARMV8_PMU_CYCLE_IDX); + return (!(idx & 1) && (idx + 1) < ARMV8_PMU_CYCLE_IDX && + !kvm_pmu_idx_has_64bit_overflow(vcpu, idx)); } static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc) @@ -97,7 +103,7 @@ u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) counter += perf_event_read_value(pmc->perf_event, &enabled, &running); - if (select_idx != ARMV8_PMU_CYCLE_IDX) + if (!kvm_pmu_idx_is_64bit(vcpu, select_idx)) counter = lower_32_bits(counter); return counter; @@ -423,6 +429,23 @@ static void kvm_pmu_counter_increment(struct kvm_vcpu *vcpu, } } +/* Compute the sample period for a given counter value */ +static u64 compute_period(struct kvm_vcpu *vcpu, u64 select_idx, u64 counter) +{ + u64 val; + + if (kvm_pmu_idx_is_64bit(vcpu, select_idx)) { + if (!kvm_pmu_idx_has_64bit_overflow(vcpu, select_idx)) + val = -(counter & GENMASK(31, 0)); + else + val = (-counter) & GENMASK(63, 0); + } else { + val = (-counter) & GENMASK(31, 0); + } + + return val; +} + /** * When the perf event overflows, set the overflow status and inform the vcpu. */ @@ -442,10 +465,7 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event, * Reset the sample period to the architectural limit, * i.e. the point where the counter overflows. */ - period = -(local64_read(&perf_event->count)); - - if (!kvm_pmu_idx_is_64bit(vcpu, pmc->idx)) - period &= GENMASK(31, 0); + period = compute_period(vcpu, idx, local64_read(&perf_event->count)); local64_set(&perf_event->hw.period_left, 0); perf_event->attr.sample_period = period; @@ -571,14 +591,13 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) /* * If counting with a 64bit counter, advertise it to the perf - * code, carefully dealing with the initial sample period. + * code, carefully dealing with the initial sample period + * which also depends on the overflow. */ - if (kvm_pmu_idx_is_64bit(vcpu, select_idx)) { + if (kvm_pmu_idx_is_64bit(vcpu, select_idx)) attr.config1 |= PERF_ATTR_CFG1_COUNTER_64BIT; - attr.sample_period = (-counter) & GENMASK(63, 0); - } else { - attr.sample_period = (-counter) & GENMASK(31, 0); - } + + attr.sample_period = compute_period(vcpu, select_idx, counter); event = perf_event_create_kernel_counter(&attr, -1, current, kvm_pmu_perf_overflow, pmc); From 001d85bd6c039d3662a4f33a5d212ef3e0438b27 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 13 Nov 2022 16:38:21 +0000 Subject: [PATCH 05/20] KVM: arm64: PMU: Narrow the overflow checking when required For 64bit counters that overflow on a 32bit boundary, make sure we only check the bottom 32bit to generate a CHAIN event. Signed-off-by: Marc Zyngier Reviewed-by: Reiji Watanabe Link: https://lore.kernel.org/r/20221113163832.3154370-6-maz@kernel.org --- arch/arm64/kvm/pmu-emul.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index d050143326b5..9e6bc7edc4de 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -417,7 +417,8 @@ static void kvm_pmu_counter_increment(struct kvm_vcpu *vcpu, reg = lower_32_bits(reg); __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg; - if (reg) /* No overflow? move on */ + /* No overflow? move on */ + if (kvm_pmu_idx_has_64bit_overflow(vcpu, i) ? reg : lower_32_bits(reg)) continue; /* Mark overflow */ From 0f1e172b54f7574ca6aa46b851b332896add955f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 13 Nov 2022 16:38:22 +0000 Subject: [PATCH 06/20] KVM: arm64: PMU: Only narrow counters that are not 64bit wide The current PMU emulation sometimes narrows counters to 32bit if the counter isn't the cycle counter. As this is going to change with PMUv3p5 where the counters are all 64bit, fix the couple of cases where this happens unconditionally. Signed-off-by: Marc Zyngier Reviewed-by: Reiji Watanabe Link: https://lore.kernel.org/r/20221113163832.3154370-7-maz@kernel.org --- arch/arm64/kvm/pmu-emul.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 9e6bc7edc4de..1fab889dbc74 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -151,20 +151,17 @@ static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc) */ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc) { - u64 counter, reg, val; + u64 reg, val; if (!pmc->perf_event) return; - counter = kvm_pmu_get_counter_value(vcpu, pmc->idx); + val = kvm_pmu_get_counter_value(vcpu, pmc->idx); - if (pmc->idx == ARMV8_PMU_CYCLE_IDX) { + if (pmc->idx == ARMV8_PMU_CYCLE_IDX) reg = PMCCNTR_EL0; - val = counter; - } else { + else reg = PMEVCNTR0_EL0 + pmc->idx; - val = lower_32_bits(counter); - } __vcpu_sys_reg(vcpu, reg) = val; @@ -414,7 +411,8 @@ static void kvm_pmu_counter_increment(struct kvm_vcpu *vcpu, /* Increment this counter */ reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1; - reg = lower_32_bits(reg); + if (!kvm_pmu_idx_is_64bit(vcpu, i)) + reg = lower_32_bits(reg); __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg; /* No overflow? move on */ From 0cb9c3c87a9d3287eaf353936e6846d885102439 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 13 Nov 2022 16:38:23 +0000 Subject: [PATCH 07/20] KVM: arm64: PMU: Add counter_index_to_*reg() helpers In order to reduce the boilerplate code, add two helpers returning the counter register index (resp. the event register) in the vcpu register file from the counter index. Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221113163832.3154370-8-maz@kernel.org --- arch/arm64/kvm/pmu-emul.c | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 1fab889dbc74..faab0f57a45d 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -77,6 +77,16 @@ static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc) return container_of(vcpu_arch, struct kvm_vcpu, arch); } +static u32 counter_index_to_reg(u64 idx) +{ + return (idx == ARMV8_PMU_CYCLE_IDX) ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + idx; +} + +static u32 counter_index_to_evtreg(u64 idx) +{ + return (idx == ARMV8_PMU_CYCLE_IDX) ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + idx; +} + /** * kvm_pmu_get_counter_value - get PMU counter value * @vcpu: The vcpu pointer @@ -91,8 +101,7 @@ u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) if (!kvm_vcpu_has_pmu(vcpu)) return 0; - reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX) - ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx; + reg = counter_index_to_reg(select_idx); counter = __vcpu_sys_reg(vcpu, reg); /* @@ -122,8 +131,7 @@ void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) if (!kvm_vcpu_has_pmu(vcpu)) return; - reg = (select_idx == ARMV8_PMU_CYCLE_IDX) - ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx; + reg = counter_index_to_reg(select_idx); __vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx); /* Recreate the perf event to reflect the updated sample_period */ @@ -158,10 +166,7 @@ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc) val = kvm_pmu_get_counter_value(vcpu, pmc->idx); - if (pmc->idx == ARMV8_PMU_CYCLE_IDX) - reg = PMCCNTR_EL0; - else - reg = PMEVCNTR0_EL0 + pmc->idx; + reg = counter_index_to_reg(pmc->idx); __vcpu_sys_reg(vcpu, reg) = val; @@ -404,16 +409,16 @@ static void kvm_pmu_counter_increment(struct kvm_vcpu *vcpu, u64 type, reg; /* Filter on event type */ - type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i); + type = __vcpu_sys_reg(vcpu, counter_index_to_evtreg(i)); type &= kvm_pmu_event_mask(vcpu->kvm); if (type != event) continue; /* Increment this counter */ - reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1; + reg = __vcpu_sys_reg(vcpu, counter_index_to_reg(i)) + 1; if (!kvm_pmu_idx_is_64bit(vcpu, i)) reg = lower_32_bits(reg); - __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg; + __vcpu_sys_reg(vcpu, counter_index_to_reg(i)) = reg; /* No overflow? move on */ if (kvm_pmu_idx_has_64bit_overflow(vcpu, i) ? reg : lower_32_bits(reg)) @@ -549,8 +554,7 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) struct perf_event_attr attr; u64 eventsel, counter, reg, data; - reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX) - ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + pmc->idx; + reg = counter_index_to_evtreg(select_idx); data = __vcpu_sys_reg(vcpu, reg); kvm_pmu_stop_counter(vcpu, pmc); @@ -632,8 +636,7 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, mask &= ~ARMV8_PMU_EVTYPE_EVENT; mask |= kvm_pmu_event_mask(vcpu->kvm); - reg = (select_idx == ARMV8_PMU_CYCLE_IDX) - ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + select_idx; + reg = counter_index_to_evtreg(select_idx); __vcpu_sys_reg(vcpu, reg) = data & mask; From 9917264d74d9063341968a8e071266358496777b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 13 Nov 2022 16:38:24 +0000 Subject: [PATCH 08/20] KVM: arm64: PMU: Simplify setting a counter to a specific value kvm_pmu_set_counter_value() is pretty odd, as it tries to update the counter value while taking into account the value that is currently held by the running perf counter. This is not only complicated, this is quite wrong. Nowhere in the architecture is it said that the counter would be offset by something that is pending. The counter should be updated with the value set by SW, and start counting from there if required. Remove the odd computation and just assign the provided value after having released the perf event (which is then restarted). Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221113163832.3154370-9-maz@kernel.org --- arch/arm64/kvm/pmu-emul.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index faab0f57a45d..ea0c8411641f 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -23,6 +23,7 @@ static LIST_HEAD(arm_pmus); static DEFINE_MUTEX(arm_pmus_lock); static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx); +static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc); static u32 kvm_pmu_event_mask(struct kvm *kvm) { @@ -131,8 +132,10 @@ void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) if (!kvm_vcpu_has_pmu(vcpu)) return; + kvm_pmu_release_perf_event(&vcpu->arch.pmu.pmc[select_idx]); + reg = counter_index_to_reg(select_idx); - __vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx); + __vcpu_sys_reg(vcpu, reg) = val; /* Recreate the perf event to reflect the updated sample_period */ kvm_pmu_create_perf_event(vcpu, select_idx); From 26d2d0594d7016dbcbce4038aa202c2858d5a944 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 13 Nov 2022 16:38:25 +0000 Subject: [PATCH 09/20] KVM: arm64: PMU: Do not let AArch32 change the counters' top 32 bits Even when using PMUv3p5 (which implies 64bit counters), there is no way for AArch32 to write to the top 32 bits of the counters. The only way to influence these bits (other than by counting events) is by writing PMCR.P==1. Make sure we obey the architecture and preserve the top 32 bits on a counter update. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221113163832.3154370-10-maz@kernel.org --- arch/arm64/kvm/pmu-emul.c | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index ea0c8411641f..7a945fa6dd03 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -119,13 +119,8 @@ u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) return counter; } -/** - * kvm_pmu_set_counter_value - set PMU counter value - * @vcpu: The vcpu pointer - * @select_idx: The counter index - * @val: The counter value - */ -void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) +static void kvm_pmu_set_counter(struct kvm_vcpu *vcpu, u64 select_idx, u64 val, + bool force) { u64 reg; @@ -135,12 +130,36 @@ void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) kvm_pmu_release_perf_event(&vcpu->arch.pmu.pmc[select_idx]); reg = counter_index_to_reg(select_idx); + + if (vcpu_mode_is_32bit(vcpu) && select_idx != ARMV8_PMU_CYCLE_IDX && + !force) { + /* + * Even with PMUv3p5, AArch32 cannot write to the top + * 32bit of the counters. The only possible course of + * action is to use PMCR.P, which will reset them to + * 0 (the only use of the 'force' parameter). + */ + val = __vcpu_sys_reg(vcpu, reg) & GENMASK(63, 32); + val |= lower_32_bits(val); + } + __vcpu_sys_reg(vcpu, reg) = val; /* Recreate the perf event to reflect the updated sample_period */ kvm_pmu_create_perf_event(vcpu, select_idx); } +/** + * kvm_pmu_set_counter_value - set PMU counter value + * @vcpu: The vcpu pointer + * @select_idx: The counter index + * @val: The counter value + */ +void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) +{ + kvm_pmu_set_counter(vcpu, select_idx, val, false); +} + /** * kvm_pmu_release_perf_event - remove the perf event * @pmc: The PMU counter pointer @@ -533,7 +552,7 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) unsigned long mask = kvm_pmu_valid_counter_mask(vcpu); mask &= ~BIT(ARMV8_PMU_CYCLE_IDX); for_each_set_bit(i, &mask, 32) - kvm_pmu_set_counter_value(vcpu, i, 0); + kvm_pmu_set_counter(vcpu, i, 0, true); } } From 3d0dba5764b94308b8c4257ad64e383f11ce0c92 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 13 Nov 2022 16:38:26 +0000 Subject: [PATCH 10/20] KVM: arm64: PMU: Move the ID_AA64DFR0_EL1.PMUver limit to VM creation As further patches will enable the selection of a PMU revision from userspace, sample the supported PMU revision at VM creation time, rather than building each time the ID_AA64DFR0_EL1 register is accessed. This shouldn't result in any change in behaviour. Reviewed-by: Reiji Watanabe Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221113163832.3154370-11-maz@kernel.org --- arch/arm64/include/asm/kvm_host.h | 4 ++++ arch/arm64/kvm/arm.c | 6 ++++++ arch/arm64/kvm/pmu-emul.c | 11 ++++++++++ arch/arm64/kvm/sys_regs.c | 36 ++++++++++++++++++++++++------- include/kvm/arm_pmu.h | 6 ++++++ 5 files changed, 55 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 45e2136322ba..cc44e3bc528d 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -163,6 +163,10 @@ struct kvm_arch { u8 pfr0_csv2; u8 pfr0_csv3; + struct { + u8 imp:4; + u8 unimp:4; + } dfr0_pmuver; /* Hypercall features firmware registers' descriptor */ struct kvm_smccc_features smccc_feat; diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 94d33e296e10..f956aab438c7 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -164,6 +164,12 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) set_default_spectre(kvm); kvm_arm_init_hypercalls(kvm); + /* + * Initialise the default PMUver before there is a chance to + * create an actual PMU. + */ + kvm->arch.dfr0_pmuver.imp = kvm_arm_pmu_get_pmuver_limit(); + return ret; out_free_stage2_pgd: kvm_free_stage2_pgd(&kvm->arch.mmu); diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 7a945fa6dd03..94ca2d17a4e4 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -1047,3 +1047,14 @@ int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) return -ENXIO; } + +u8 kvm_arm_pmu_get_pmuver_limit(void) +{ + u64 tmp; + + tmp = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1); + tmp = cpuid_feature_cap_perfmon_field(tmp, + ID_AA64DFR0_EL1_PMUVer_SHIFT, + ID_AA64DFR0_EL1_PMUVer_V3P4); + return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), tmp); +} diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index f4a7c5abcbca..297b4fcbf969 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1062,6 +1062,27 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu, return true; } +static u8 vcpu_pmuver(const struct kvm_vcpu *vcpu) +{ + if (kvm_vcpu_has_pmu(vcpu)) + return vcpu->kvm->arch.dfr0_pmuver.imp; + + return vcpu->kvm->arch.dfr0_pmuver.unimp; +} + +static u8 pmuver_to_perfmon(u8 pmuver) +{ + switch (pmuver) { + case ID_AA64DFR0_EL1_PMUVer_IMP: + return ID_DFR0_PERFMON_8_0; + case ID_AA64DFR0_EL1_PMUVer_IMP_DEF: + return ID_DFR0_PERFMON_IMP_DEF; + default: + /* Anything ARMv8.1+ and NI have the same value. For now. */ + return pmuver; + } +} + /* Read a sanitised cpufeature ID register by sys_reg_desc */ static u64 read_id_reg(const struct kvm_vcpu *vcpu, struct sys_reg_desc const *r) { @@ -1111,18 +1132,17 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, struct sys_reg_desc const *r /* Limit debug to ARMv8.0 */ val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer); val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer), 6); - /* Limit guests to PMUv3 for ARMv8.4 */ - val = cpuid_feature_cap_perfmon_field(val, - ID_AA64DFR0_EL1_PMUVer_SHIFT, - kvm_vcpu_has_pmu(vcpu) ? ID_AA64DFR0_EL1_PMUVer_V3P4 : 0); + /* Set PMUver to the required version */ + val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer); + val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), + vcpu_pmuver(vcpu)); /* Hide SPE from guests */ val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMSVer); break; case SYS_ID_DFR0_EL1: - /* Limit guests to PMUv3 for ARMv8.4 */ - val = cpuid_feature_cap_perfmon_field(val, - ID_DFR0_PERFMON_SHIFT, - kvm_vcpu_has_pmu(vcpu) ? ID_DFR0_PERFMON_8_4 : 0); + val &= ~ARM64_FEATURE_MASK(ID_DFR0_PERFMON); + val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_DFR0_PERFMON), + pmuver_to_perfmon(vcpu_pmuver(vcpu))); break; } diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index 96b192139a23..812f729c9108 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -89,6 +89,8 @@ void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu); vcpu->arch.pmu.events = *kvm_get_pmu_events(); \ } while (0) +u8 kvm_arm_pmu_get_pmuver_limit(void); + #else struct kvm_pmu { }; @@ -154,6 +156,10 @@ static inline u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) static inline void kvm_pmu_update_vcpu_events(struct kvm_vcpu *vcpu) {} static inline void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu) {} static inline void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu) {} +static inline u8 kvm_arm_pmu_get_pmuver_limit(void) +{ + return 0; +} #endif From 60e651ff1f48bfdf8fec80d35510bd89ecf8c766 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 13 Nov 2022 16:38:27 +0000 Subject: [PATCH 11/20] KVM: arm64: PMU: Allow ID_AA64DFR0_EL1.PMUver to be set from userspace Allow userspace to write ID_AA64DFR0_EL1, on the condition that only the PMUver field can be altered and be at most the one that was initially computed for the guest. Reviewed-by: Reiji Watanabe Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221113163832.3154370-12-maz@kernel.org --- arch/arm64/kvm/sys_regs.c | 42 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 297b4fcbf969..49585258ae6c 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1242,6 +1242,45 @@ static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu, return 0; } +static int set_id_aa64dfr0_el1(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd, + u64 val) +{ + u8 pmuver, host_pmuver; + bool valid_pmu; + + host_pmuver = kvm_arm_pmu_get_pmuver_limit(); + + /* + * Allow AA64DFR0_EL1.PMUver to be set from userspace as long + * as it doesn't promise more than what the HW gives us. We + * allow an IMPDEF PMU though, only if no PMU is supported + * (KVM backward compatibility handling). + */ + pmuver = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), val); + if ((pmuver != ID_AA64DFR0_EL1_PMUVer_IMP_DEF && pmuver > host_pmuver)) + return -EINVAL; + + valid_pmu = (pmuver != 0 && pmuver != ID_AA64DFR0_EL1_PMUVer_IMP_DEF); + + /* Make sure view register and PMU support do match */ + if (kvm_vcpu_has_pmu(vcpu) != valid_pmu) + return -EINVAL; + + /* We can only differ with PMUver, and anything else is an error */ + val ^= read_id_reg(vcpu, rd); + val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer); + if (val) + return -EINVAL; + + if (valid_pmu) + vcpu->kvm->arch.dfr0_pmuver.imp = pmuver; + else + vcpu->kvm->arch.dfr0_pmuver.unimp = pmuver; + + return 0; +} + /* * cpufeature ID register user accessors * @@ -1503,7 +1542,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { ID_UNALLOCATED(4,7), /* CRm=5 */ - ID_SANITISED(ID_AA64DFR0_EL1), + { SYS_DESC(SYS_ID_AA64DFR0_EL1), .access = access_id_reg, + .get_user = get_id_reg, .set_user = set_id_aa64dfr0_el1, }, ID_SANITISED(ID_AA64DFR1_EL1), ID_UNALLOCATED(5,2), ID_UNALLOCATED(5,3), From d82e0dfdfda73f91e7282e1083a2cd7cd366ea87 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 13 Nov 2022 16:38:28 +0000 Subject: [PATCH 12/20] KVM: arm64: PMU: Allow ID_DFR0_EL1.PerfMon to be set from userspace Allow userspace to write ID_DFR0_EL1, on the condition that only the PerfMon field can be altered and be something that is compatible with what was computed for the AArch64 view of the guest. Reviewed-by: Reiji Watanabe Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221113163832.3154370-13-maz@kernel.org --- arch/arm64/kvm/sys_regs.c | 57 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 56 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 49585258ae6c..b8ac58723459 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1070,6 +1070,19 @@ static u8 vcpu_pmuver(const struct kvm_vcpu *vcpu) return vcpu->kvm->arch.dfr0_pmuver.unimp; } +static u8 perfmon_to_pmuver(u8 perfmon) +{ + switch (perfmon) { + case ID_DFR0_PERFMON_8_0: + return ID_AA64DFR0_EL1_PMUVer_IMP; + case ID_DFR0_PERFMON_IMP_DEF: + return ID_AA64DFR0_EL1_PMUVer_IMP_DEF; + default: + /* Anything ARMv8.1+ and NI have the same value. For now. */ + return perfmon; + } +} + static u8 pmuver_to_perfmon(u8 pmuver) { switch (pmuver) { @@ -1281,6 +1294,46 @@ static int set_id_aa64dfr0_el1(struct kvm_vcpu *vcpu, return 0; } +static int set_id_dfr0_el1(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd, + u64 val) +{ + u8 perfmon, host_perfmon; + bool valid_pmu; + + host_perfmon = pmuver_to_perfmon(kvm_arm_pmu_get_pmuver_limit()); + + /* + * Allow DFR0_EL1.PerfMon to be set from userspace as long as + * it doesn't promise more than what the HW gives us on the + * AArch64 side (as everything is emulated with that), and + * that this is a PMUv3. + */ + perfmon = FIELD_GET(ARM64_FEATURE_MASK(ID_DFR0_PERFMON), val); + if ((perfmon != ID_DFR0_PERFMON_IMP_DEF && perfmon > host_perfmon) || + (perfmon != 0 && perfmon < ID_DFR0_PERFMON_8_0)) + return -EINVAL; + + valid_pmu = (perfmon != 0 && perfmon != ID_DFR0_PERFMON_IMP_DEF); + + /* Make sure view register and PMU support do match */ + if (kvm_vcpu_has_pmu(vcpu) != valid_pmu) + return -EINVAL; + + /* We can only differ with PerfMon, and anything else is an error */ + val ^= read_id_reg(vcpu, rd); + val &= ~ARM64_FEATURE_MASK(ID_DFR0_PERFMON); + if (val) + return -EINVAL; + + if (valid_pmu) + vcpu->kvm->arch.dfr0_pmuver.imp = perfmon_to_pmuver(perfmon); + else + vcpu->kvm->arch.dfr0_pmuver.unimp = perfmon_to_pmuver(perfmon); + + return 0; +} + /* * cpufeature ID register user accessors * @@ -1502,7 +1555,9 @@ static const struct sys_reg_desc sys_reg_descs[] = { /* CRm=1 */ AA32_ID_SANITISED(ID_PFR0_EL1), AA32_ID_SANITISED(ID_PFR1_EL1), - AA32_ID_SANITISED(ID_DFR0_EL1), + { SYS_DESC(SYS_ID_DFR0_EL1), .access = access_id_reg, + .get_user = get_id_reg, .set_user = set_id_dfr0_el1, + .visibility = aa32_id_visibility, }, ID_HIDDEN(ID_AFR0_EL1), AA32_ID_SANITISED(ID_MMFR0_EL1), AA32_ID_SANITISED(ID_MMFR1_EL1), From 11af4c37165e36a6090172ded5d06acdf15206da Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 13 Nov 2022 16:38:29 +0000 Subject: [PATCH 13/20] KVM: arm64: PMU: Implement PMUv3p5 long counter support PMUv3p5 (which is mandatory with ARMv8.5) comes with some extra features: - All counters are 64bit - The overflow point is controlled by the PMCR_EL0.LP bit Add the required checks in the helpers that control counter width and overflow, as well as the sysreg handling for the LP bit. A new kvm_pmu_is_3p5() helper makes it easy to spot the PMUv3p5 specific handling. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221113163832.3154370-14-maz@kernel.org --- arch/arm64/kvm/pmu-emul.c | 8 +++++--- arch/arm64/kvm/sys_regs.c | 4 ++++ include/kvm/arm_pmu.h | 7 +++++++ 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 94ca2d17a4e4..7e25ff73cbba 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -52,13 +52,15 @@ static u32 kvm_pmu_event_mask(struct kvm *kvm) */ static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx) { - return (select_idx == ARMV8_PMU_CYCLE_IDX); + return (select_idx == ARMV8_PMU_CYCLE_IDX || kvm_pmu_is_3p5(vcpu)); } static bool kvm_pmu_idx_has_64bit_overflow(struct kvm_vcpu *vcpu, u64 select_idx) { - return (select_idx == ARMV8_PMU_CYCLE_IDX && - __vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_LC); + u64 val = __vcpu_sys_reg(vcpu, PMCR_EL0); + + return (select_idx < ARMV8_PMU_CYCLE_IDX && (val & ARMV8_PMU_PMCR_LP)) || + (select_idx == ARMV8_PMU_CYCLE_IDX && (val & ARMV8_PMU_PMCR_LC)); } static bool kvm_pmu_counter_can_chain(struct kvm_vcpu *vcpu, u64 idx) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index b8ac58723459..67eac0f747be 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -654,6 +654,8 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) | (ARMV8_PMU_PMCR_MASK & 0xdecafbad)) & (~ARMV8_PMU_PMCR_E); if (!kvm_supports_32bit_el0()) val |= ARMV8_PMU_PMCR_LC; + if (!kvm_pmu_is_3p5(vcpu)) + val &= ~ARMV8_PMU_PMCR_LP; __vcpu_sys_reg(vcpu, r->reg) = val; } @@ -703,6 +705,8 @@ static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, val |= p->regval & ARMV8_PMU_PMCR_MASK; if (!kvm_supports_32bit_el0()) val |= ARMV8_PMU_PMCR_LC; + if (!kvm_pmu_is_3p5(vcpu)) + val &= ~ARMV8_PMU_PMCR_LP; __vcpu_sys_reg(vcpu, PMCR_EL0) = val; kvm_pmu_handle_pmcr(vcpu, val); kvm_vcpu_pmu_restore_guest(vcpu); diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index 812f729c9108..628775334d5e 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -89,6 +89,12 @@ void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu); vcpu->arch.pmu.events = *kvm_get_pmu_events(); \ } while (0) +/* + * Evaluates as true when emulating PMUv3p5, and false otherwise. + */ +#define kvm_pmu_is_3p5(vcpu) \ + (vcpu->kvm->arch.dfr0_pmuver.imp >= ID_AA64DFR0_EL1_PMUVer_V3P5) + u8 kvm_arm_pmu_get_pmuver_limit(void); #else @@ -153,6 +159,7 @@ static inline u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) } #define kvm_vcpu_has_pmu(vcpu) ({ false; }) +#define kvm_pmu_is_3p5(vcpu) ({ false; }) static inline void kvm_pmu_update_vcpu_events(struct kvm_vcpu *vcpu) {} static inline void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu) {} static inline void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu) {} From 1f7c978282855d6b2abd608064004c74902e791d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 13 Nov 2022 16:38:30 +0000 Subject: [PATCH 14/20] KVM: arm64: PMU: Allow PMUv3p5 to be exposed to the guest Now that the infrastructure is in place, bump the PMU support up to PMUv3p5. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221113163832.3154370-15-maz@kernel.org --- arch/arm64/kvm/pmu-emul.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 7e25ff73cbba..be881ae67133 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -1057,6 +1057,6 @@ u8 kvm_arm_pmu_get_pmuver_limit(void) tmp = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1); tmp = cpuid_feature_cap_perfmon_field(tmp, ID_AA64DFR0_EL1_PMUVer_SHIFT, - ID_AA64DFR0_EL1_PMUVer_V3P4); + ID_AA64DFR0_EL1_PMUVer_V3P5); return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), tmp); } From 9bad925dd741408825590eccc495d073cc246de0 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 13 Nov 2022 16:38:31 +0000 Subject: [PATCH 15/20] KVM: arm64: PMU: Simplify vcpu computation on perf overflow notification The way we compute the target vcpu on getting an overflow is a bit odd, as we use the PMC array as an anchor for kvm_pmc_to_vcpu, while we could directly compute the correct address. Get rid of the intermediate step and directly compute the target vcpu. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221113163832.3154370-16-maz@kernel.org --- arch/arm64/kvm/pmu-emul.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index be881ae67133..49a004660497 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -405,11 +405,8 @@ void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu) static void kvm_pmu_perf_overflow_notify_vcpu(struct irq_work *work) { struct kvm_vcpu *vcpu; - struct kvm_pmu *pmu; - - pmu = container_of(work, struct kvm_pmu, overflow_work); - vcpu = kvm_pmc_to_vcpu(pmu->pmc); + vcpu = container_of(work, struct kvm_vcpu, arch.pmu.overflow_work); kvm_vcpu_kick(vcpu); } From d56bdce586e7fabd2b3339f476e0e4c059b24e19 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 13 Nov 2022 16:38:32 +0000 Subject: [PATCH 16/20] KVM: arm64: PMU: Make kvm_pmc the main data structure The PMU code has historically been torn between referencing a counter as a pair vcpu+index or as the PMC pointer. Given that it is pretty easy to go from one representation to the other, standardise on the latter which, IMHO, makes the code slightly more readable. YMMV. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221113163832.3154370-17-maz@kernel.org --- arch/arm64/kvm/pmu-emul.c | 174 +++++++++++++++++++------------------- 1 file changed, 87 insertions(+), 87 deletions(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 49a004660497..3295dea34f4c 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -22,9 +22,19 @@ DEFINE_STATIC_KEY_FALSE(kvm_arm_pmu_available); static LIST_HEAD(arm_pmus); static DEFINE_MUTEX(arm_pmus_lock); -static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx); +static void kvm_pmu_create_perf_event(struct kvm_pmc *pmc); static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc); +static struct kvm_vcpu *kvm_pmc_to_vcpu(const struct kvm_pmc *pmc) +{ + return container_of(pmc, struct kvm_vcpu, arch.pmu.pmc[pmc->idx]); +} + +static struct kvm_pmc *kvm_vcpu_idx_to_pmc(struct kvm_vcpu *vcpu, int cnt_idx) +{ + return &vcpu->arch.pmu.pmc[cnt_idx]; +} + static u32 kvm_pmu_event_mask(struct kvm *kvm) { unsigned int pmuver; @@ -46,38 +56,27 @@ static u32 kvm_pmu_event_mask(struct kvm *kvm) } /** - * kvm_pmu_idx_is_64bit - determine if select_idx is a 64bit counter - * @vcpu: The vcpu pointer - * @select_idx: The counter index + * kvm_pmc_is_64bit - determine if counter is 64bit + * @pmc: counter context */ -static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx) +static bool kvm_pmc_is_64bit(struct kvm_pmc *pmc) { - return (select_idx == ARMV8_PMU_CYCLE_IDX || kvm_pmu_is_3p5(vcpu)); + return (pmc->idx == ARMV8_PMU_CYCLE_IDX || + kvm_pmu_is_3p5(kvm_pmc_to_vcpu(pmc))); } -static bool kvm_pmu_idx_has_64bit_overflow(struct kvm_vcpu *vcpu, u64 select_idx) +static bool kvm_pmc_has_64bit_overflow(struct kvm_pmc *pmc) { - u64 val = __vcpu_sys_reg(vcpu, PMCR_EL0); + u64 val = __vcpu_sys_reg(kvm_pmc_to_vcpu(pmc), PMCR_EL0); - return (select_idx < ARMV8_PMU_CYCLE_IDX && (val & ARMV8_PMU_PMCR_LP)) || - (select_idx == ARMV8_PMU_CYCLE_IDX && (val & ARMV8_PMU_PMCR_LC)); + return (pmc->idx < ARMV8_PMU_CYCLE_IDX && (val & ARMV8_PMU_PMCR_LP)) || + (pmc->idx == ARMV8_PMU_CYCLE_IDX && (val & ARMV8_PMU_PMCR_LC)); } -static bool kvm_pmu_counter_can_chain(struct kvm_vcpu *vcpu, u64 idx) +static bool kvm_pmu_counter_can_chain(struct kvm_pmc *pmc) { - return (!(idx & 1) && (idx + 1) < ARMV8_PMU_CYCLE_IDX && - !kvm_pmu_idx_has_64bit_overflow(vcpu, idx)); -} - -static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc) -{ - struct kvm_pmu *pmu; - struct kvm_vcpu_arch *vcpu_arch; - - pmc -= pmc->idx; - pmu = container_of(pmc, struct kvm_pmu, pmc[0]); - vcpu_arch = container_of(pmu, struct kvm_vcpu_arch, pmu); - return container_of(vcpu_arch, struct kvm_vcpu, arch); + return (!(pmc->idx & 1) && (pmc->idx + 1) < ARMV8_PMU_CYCLE_IDX && + !kvm_pmc_has_64bit_overflow(pmc)); } static u32 counter_index_to_reg(u64 idx) @@ -90,21 +89,12 @@ static u32 counter_index_to_evtreg(u64 idx) return (idx == ARMV8_PMU_CYCLE_IDX) ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + idx; } -/** - * kvm_pmu_get_counter_value - get PMU counter value - * @vcpu: The vcpu pointer - * @select_idx: The counter index - */ -u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) +static u64 kvm_pmu_get_pmc_value(struct kvm_pmc *pmc) { + struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); u64 counter, reg, enabled, running; - struct kvm_pmu *pmu = &vcpu->arch.pmu; - struct kvm_pmc *pmc = &pmu->pmc[select_idx]; - if (!kvm_vcpu_has_pmu(vcpu)) - return 0; - - reg = counter_index_to_reg(select_idx); + reg = counter_index_to_reg(pmc->idx); counter = __vcpu_sys_reg(vcpu, reg); /* @@ -115,25 +105,35 @@ u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) counter += perf_event_read_value(pmc->perf_event, &enabled, &running); - if (!kvm_pmu_idx_is_64bit(vcpu, select_idx)) + if (!kvm_pmc_is_64bit(pmc)) counter = lower_32_bits(counter); return counter; } -static void kvm_pmu_set_counter(struct kvm_vcpu *vcpu, u64 select_idx, u64 val, - bool force) +/** + * kvm_pmu_get_counter_value - get PMU counter value + * @vcpu: The vcpu pointer + * @select_idx: The counter index + */ +u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) { + if (!kvm_vcpu_has_pmu(vcpu)) + return 0; + + return kvm_pmu_get_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, select_idx)); +} + +static void kvm_pmu_set_pmc_value(struct kvm_pmc *pmc, u64 val, bool force) +{ + struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); u64 reg; - if (!kvm_vcpu_has_pmu(vcpu)) - return; + kvm_pmu_release_perf_event(pmc); - kvm_pmu_release_perf_event(&vcpu->arch.pmu.pmc[select_idx]); + reg = counter_index_to_reg(pmc->idx); - reg = counter_index_to_reg(select_idx); - - if (vcpu_mode_is_32bit(vcpu) && select_idx != ARMV8_PMU_CYCLE_IDX && + if (vcpu_mode_is_32bit(vcpu) && pmc->idx != ARMV8_PMU_CYCLE_IDX && !force) { /* * Even with PMUv3p5, AArch32 cannot write to the top @@ -148,7 +148,7 @@ static void kvm_pmu_set_counter(struct kvm_vcpu *vcpu, u64 select_idx, u64 val, __vcpu_sys_reg(vcpu, reg) = val; /* Recreate the perf event to reflect the updated sample_period */ - kvm_pmu_create_perf_event(vcpu, select_idx); + kvm_pmu_create_perf_event(pmc); } /** @@ -159,7 +159,10 @@ static void kvm_pmu_set_counter(struct kvm_vcpu *vcpu, u64 select_idx, u64 val, */ void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) { - kvm_pmu_set_counter(vcpu, select_idx, val, false); + if (!kvm_vcpu_has_pmu(vcpu)) + return; + + kvm_pmu_set_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, select_idx), val, false); } /** @@ -181,14 +184,15 @@ static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc) * * If this counter has been configured to monitor some event, release it here. */ -static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc) +static void kvm_pmu_stop_counter(struct kvm_pmc *pmc) { + struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); u64 reg, val; if (!pmc->perf_event) return; - val = kvm_pmu_get_counter_value(vcpu, pmc->idx); + val = kvm_pmu_get_pmc_value(pmc); reg = counter_index_to_reg(pmc->idx); @@ -219,11 +223,10 @@ void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu) void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) { unsigned long mask = kvm_pmu_valid_counter_mask(vcpu); - struct kvm_pmu *pmu = &vcpu->arch.pmu; int i; for_each_set_bit(i, &mask, 32) - kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]); + kvm_pmu_stop_counter(kvm_vcpu_idx_to_pmc(vcpu, i)); } /** @@ -234,10 +237,9 @@ void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) { int i; - struct kvm_pmu *pmu = &vcpu->arch.pmu; for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) - kvm_pmu_release_perf_event(&pmu->pmc[i]); + kvm_pmu_release_perf_event(kvm_vcpu_idx_to_pmc(vcpu, i)); irq_work_sync(&vcpu->arch.pmu.overflow_work); } @@ -262,9 +264,6 @@ u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu) void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val) { int i; - struct kvm_pmu *pmu = &vcpu->arch.pmu; - struct kvm_pmc *pmc; - if (!kvm_vcpu_has_pmu(vcpu)) return; @@ -272,13 +271,15 @@ void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val) return; for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { + struct kvm_pmc *pmc; + if (!(val & BIT(i))) continue; - pmc = &pmu->pmc[i]; + pmc = kvm_vcpu_idx_to_pmc(vcpu, i); if (!pmc->perf_event) { - kvm_pmu_create_perf_event(vcpu, i); + kvm_pmu_create_perf_event(pmc); } else { perf_event_enable(pmc->perf_event); if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE) @@ -297,17 +298,17 @@ void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val) void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val) { int i; - struct kvm_pmu *pmu = &vcpu->arch.pmu; - struct kvm_pmc *pmc; if (!kvm_vcpu_has_pmu(vcpu) || !val) return; for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { + struct kvm_pmc *pmc; + if (!(val & BIT(i))) continue; - pmc = &pmu->pmc[i]; + pmc = kvm_vcpu_idx_to_pmc(vcpu, i); if (pmc->perf_event) perf_event_disable(pmc->perf_event); @@ -427,6 +428,7 @@ static void kvm_pmu_counter_increment(struct kvm_vcpu *vcpu, mask &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); for_each_set_bit(i, &mask, ARMV8_PMU_CYCLE_IDX) { + struct kvm_pmc *pmc = kvm_vcpu_idx_to_pmc(vcpu, i); u64 type, reg; /* Filter on event type */ @@ -437,30 +439,30 @@ static void kvm_pmu_counter_increment(struct kvm_vcpu *vcpu, /* Increment this counter */ reg = __vcpu_sys_reg(vcpu, counter_index_to_reg(i)) + 1; - if (!kvm_pmu_idx_is_64bit(vcpu, i)) + if (!kvm_pmc_is_64bit(pmc)) reg = lower_32_bits(reg); __vcpu_sys_reg(vcpu, counter_index_to_reg(i)) = reg; /* No overflow? move on */ - if (kvm_pmu_idx_has_64bit_overflow(vcpu, i) ? reg : lower_32_bits(reg)) + if (kvm_pmc_has_64bit_overflow(pmc) ? reg : lower_32_bits(reg)) continue; /* Mark overflow */ __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i); - if (kvm_pmu_counter_can_chain(vcpu, i)) + if (kvm_pmu_counter_can_chain(pmc)) kvm_pmu_counter_increment(vcpu, BIT(i + 1), ARMV8_PMUV3_PERFCTR_CHAIN); } } /* Compute the sample period for a given counter value */ -static u64 compute_period(struct kvm_vcpu *vcpu, u64 select_idx, u64 counter) +static u64 compute_period(struct kvm_pmc *pmc, u64 counter) { u64 val; - if (kvm_pmu_idx_is_64bit(vcpu, select_idx)) { - if (!kvm_pmu_idx_has_64bit_overflow(vcpu, select_idx)) + if (kvm_pmc_is_64bit(pmc)) { + if (!kvm_pmc_has_64bit_overflow(pmc)) val = -(counter & GENMASK(31, 0)); else val = (-counter) & GENMASK(63, 0); @@ -490,7 +492,7 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event, * Reset the sample period to the architectural limit, * i.e. the point where the counter overflows. */ - period = compute_period(vcpu, idx, local64_read(&perf_event->count)); + period = compute_period(pmc, local64_read(&perf_event->count)); local64_set(&perf_event->hw.period_left, 0); perf_event->attr.sample_period = period; @@ -498,7 +500,7 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event, __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx); - if (kvm_pmu_counter_can_chain(vcpu, idx)) + if (kvm_pmu_counter_can_chain(pmc)) kvm_pmu_counter_increment(vcpu, BIT(idx + 1), ARMV8_PMUV3_PERFCTR_CHAIN); @@ -551,34 +553,33 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) unsigned long mask = kvm_pmu_valid_counter_mask(vcpu); mask &= ~BIT(ARMV8_PMU_CYCLE_IDX); for_each_set_bit(i, &mask, 32) - kvm_pmu_set_counter(vcpu, i, 0, true); + kvm_pmu_set_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, i), 0, true); } } -static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx) +static bool kvm_pmu_counter_is_enabled(struct kvm_pmc *pmc) { + struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); return (__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) && - (__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx)); + (__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(pmc->idx)); } /** * kvm_pmu_create_perf_event - create a perf event for a counter - * @vcpu: The vcpu pointer - * @select_idx: The number of selected counter + * @pmc: Counter context */ -static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) +static void kvm_pmu_create_perf_event(struct kvm_pmc *pmc) { + struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); struct arm_pmu *arm_pmu = vcpu->kvm->arch.arm_pmu; - struct kvm_pmu *pmu = &vcpu->arch.pmu; - struct kvm_pmc *pmc = &pmu->pmc[select_idx]; struct perf_event *event; struct perf_event_attr attr; - u64 eventsel, counter, reg, data; + u64 eventsel, reg, data; - reg = counter_index_to_evtreg(select_idx); + reg = counter_index_to_evtreg(pmc->idx); data = __vcpu_sys_reg(vcpu, reg); - kvm_pmu_stop_counter(vcpu, pmc); + kvm_pmu_stop_counter(pmc); if (pmc->idx == ARMV8_PMU_CYCLE_IDX) eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES; else @@ -604,24 +605,22 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) attr.type = arm_pmu->pmu.type; attr.size = sizeof(attr); attr.pinned = 1; - attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, pmc->idx); + attr.disabled = !kvm_pmu_counter_is_enabled(pmc); attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0; attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0; attr.exclude_hv = 1; /* Don't count EL2 events */ attr.exclude_host = 1; /* Don't count host events */ attr.config = eventsel; - counter = kvm_pmu_get_counter_value(vcpu, select_idx); - /* * If counting with a 64bit counter, advertise it to the perf * code, carefully dealing with the initial sample period * which also depends on the overflow. */ - if (kvm_pmu_idx_is_64bit(vcpu, select_idx)) + if (kvm_pmc_is_64bit(pmc)) attr.config1 |= PERF_ATTR_CFG1_COUNTER_64BIT; - attr.sample_period = compute_period(vcpu, select_idx, counter); + attr.sample_period = compute_period(pmc, kvm_pmu_get_pmc_value(pmc)); event = perf_event_create_kernel_counter(&attr, -1, current, kvm_pmu_perf_overflow, pmc); @@ -648,6 +647,7 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, u64 select_idx) { + struct kvm_pmc *pmc = kvm_vcpu_idx_to_pmc(vcpu, select_idx); u64 reg, mask; if (!kvm_vcpu_has_pmu(vcpu)) @@ -657,11 +657,11 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, mask &= ~ARMV8_PMU_EVTYPE_EVENT; mask |= kvm_pmu_event_mask(vcpu->kvm); - reg = counter_index_to_evtreg(select_idx); + reg = counter_index_to_evtreg(pmc->idx); __vcpu_sys_reg(vcpu, reg) = data & mask; - kvm_pmu_create_perf_event(vcpu, select_idx); + kvm_pmu_create_perf_event(pmc); } void kvm_host_pmu_init(struct arm_pmu *pmu) From 86815735aa571d493cf5768cad5fa8e6fd9c7ba8 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Mon, 28 Nov 2022 19:26:29 +0530 Subject: [PATCH 17/20] KVM: arm64: PMU: Replace version number '0' with ID_AA64DFR0_EL1_PMUVer_NI kvm_host_pmu_init() returns when detected PMU is either not implemented, or implementation defined. kvm_pmu_probe_armpmu() also has a similar situation. Extracted ID_AA64DFR0_EL1_PMUVer value, when PMU is not implemented is '0', which can be replaced with ID_AA64DFR0_EL1_PMUVer_NI defined as '0b0000'. Cc: Arnaldo Carvalho de Melo Cc: Marc Zyngier Cc: Mark Rutland Cc: Will Deacon Cc: Catalin Marinas Cc: linux-perf-users@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Anshuman Khandual Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221128135629.118346-1-anshuman.khandual@arm.com --- arch/arm64/kvm/pmu-emul.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 3295dea34f4c..bb7251e670a9 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -668,7 +668,8 @@ void kvm_host_pmu_init(struct arm_pmu *pmu) { struct arm_pmu_entry *entry; - if (pmu->pmuver == 0 || pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF) + if (pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_NI || + pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF) return; mutex_lock(&arm_pmus_lock); @@ -721,7 +722,7 @@ static struct arm_pmu *kvm_pmu_probe_armpmu(void) if (event->pmu) { pmu = to_arm_pmu(event->pmu); - if (pmu->pmuver == 0 || + if (pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_NI || pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF) pmu = NULL; } From 292e8f1494764ac46dd1b7dd46fa317db691436c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 24 Nov 2022 10:40:02 +0000 Subject: [PATCH 18/20] KVM: arm64: PMU: Simplify PMCR_EL0 reset handling Resetting PMCR_EL0 is a pretty involved process that includes poisoning some of the writable bits, just because we can. It makes it hard to reason about about what gets configured, and just resetting things to 0 seems like a much saner option. Reduce reset_pmcr() to just preserving PMCR_EL0.N from the host, and setting PMCR_EL0.LC if we don't support AArch32. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 67eac0f747be..eb56ad031116 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -639,24 +639,18 @@ static void reset_pmselr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { - u64 pmcr, val; + u64 pmcr; /* No PMU available, PMCR_EL0 may UNDEF... */ if (!kvm_arm_support_pmu_v3()) return; - pmcr = read_sysreg(pmcr_el0); - /* - * Writable bits of PMCR_EL0 (ARMV8_PMU_PMCR_MASK) are reset to UNKNOWN - * except PMCR.E resetting to zero. - */ - val = ((pmcr & ~ARMV8_PMU_PMCR_MASK) - | (ARMV8_PMU_PMCR_MASK & 0xdecafbad)) & (~ARMV8_PMU_PMCR_E); + /* Only preserve PMCR_EL0.N, and reset the rest to 0 */ + pmcr = read_sysreg(pmcr_el0) & ARMV8_PMU_PMCR_N_MASK; if (!kvm_supports_32bit_el0()) - val |= ARMV8_PMU_PMCR_LC; - if (!kvm_pmu_is_3p5(vcpu)) - val &= ~ARMV8_PMU_PMCR_LP; - __vcpu_sys_reg(vcpu, r->reg) = val; + pmcr |= ARMV8_PMU_PMCR_LC; + + __vcpu_sys_reg(vcpu, r->reg) = pmcr; } static bool check_pmu_access_disabled(struct kvm_vcpu *vcpu, u64 flags) From 64d6820d64c0a206e744bd8945374d563a76c16c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 24 Nov 2022 10:44:59 +0000 Subject: [PATCH 19/20] KVM: arm64: PMU: Sanitise PMCR_EL0.LP on first vcpu run Userspace can play some dirty tricks on us by selecting a given PMU version (such as PMUv3p5), restore a PMCR_EL0 value that has PMCR_EL0.LP set, and then switch the PMU version to PMUv3p1, for example. In this situation, we end-up with PMCR_EL0.LP being set and spreading havoc in the PMU emulation. This is specially hard as the first two step can be done on one vcpu and the third step on another, meaning that we need to sanitise *all* vcpus when the PMU version is changed. In orer to avoid a pretty complicated locking situation, defer the sanitisation of PMCR_EL0 to the point where the vcpu is actually run for the first tine, using the existing KVM_REQ_RELOAD_PMU request that calls into kvm_pmu_handle_pmcr(). There is still an obscure corner case where userspace could do the above trick, and then save the VM without running it. They would then observe an inconsistent state (PMUv3.1 + LP set), but that state will be fixed on the first run anyway whenever the guest gets restored on a host. Reported-by: Reiji Watanabe Signed-off-by: Marc Zyngier --- arch/arm64/kvm/pmu-emul.c | 6 ++++++ arch/arm64/kvm/sys_regs.c | 8 ++++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index bb7251e670a9..d8ea39943086 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -538,6 +538,12 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) if (!kvm_vcpu_has_pmu(vcpu)) return; + /* Fixup PMCR_EL0 to reconcile the PMU version and the LP bit */ + if (!kvm_pmu_is_3p5(vcpu)) + val &= ~ARMV8_PMU_PMCR_LP; + + __vcpu_sys_reg(vcpu, PMCR_EL0) = val; + if (val & ARMV8_PMU_PMCR_E) { kvm_pmu_enable_counter_mask(vcpu, __vcpu_sys_reg(vcpu, PMCNTENSET_EL0)); diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index eb56ad031116..528d253c571a 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -693,15 +693,15 @@ static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, return false; if (p->is_write) { - /* Only update writeable bits of PMCR */ + /* + * Only update writeable bits of PMCR (continuing into + * kvm_pmu_handle_pmcr() as well) + */ val = __vcpu_sys_reg(vcpu, PMCR_EL0); val &= ~ARMV8_PMU_PMCR_MASK; val |= p->regval & ARMV8_PMU_PMCR_MASK; if (!kvm_supports_32bit_el0()) val |= ARMV8_PMU_PMCR_LC; - if (!kvm_pmu_is_3p5(vcpu)) - val &= ~ARMV8_PMU_PMCR_LP; - __vcpu_sys_reg(vcpu, PMCR_EL0) = val; kvm_pmu_handle_pmcr(vcpu, val); kvm_vcpu_pmu_restore_guest(vcpu); } else { From 58ff6569bc6ec369482eb2d132868870380be64c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 5 Dec 2022 12:05:51 +0000 Subject: [PATCH 20/20] KVM: arm64: PMU: Fix period computation for 64bit counters with 32bit overflow Fix the bogus masking when computing the period of a 64bit counter with 32bit overflow. It really should be treated like a 32bit counter for the purpose of the period. Reported-by: Ricardo Koller Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/Y4jbosgHbUDI0WF4@google.com --- arch/arm64/kvm/pmu-emul.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index d8ea39943086..24908400e190 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -461,14 +461,10 @@ static u64 compute_period(struct kvm_pmc *pmc, u64 counter) { u64 val; - if (kvm_pmc_is_64bit(pmc)) { - if (!kvm_pmc_has_64bit_overflow(pmc)) - val = -(counter & GENMASK(31, 0)); - else - val = (-counter) & GENMASK(63, 0); - } else { + if (kvm_pmc_is_64bit(pmc) && kvm_pmc_has_64bit_overflow(pmc)) + val = (-counter) & GENMASK(63, 0); + else val = (-counter) & GENMASK(31, 0); - } return val; }