Merge branch kvm-arm64/nv-pmu-fixes into kvmarm-master/next

* kvm-arm64/nv-pmu-fixes:
  : .
  : Fixes for NV PMU emulation. From the cover letter:
  :
  : "Joey reports that some of his PMU tests do not behave quite as
  : expected:
  :
  : - MDCR_EL2.HPMN is set to 0 out of reset
  :
  : - PMCR_EL0.P should reset all the counters when written from EL2
  :
  : Oliver points out that setting PMCR_EL0.N from userspace by writing to
  : the register is silly with NV, and that we need a new PMU attribute
  : instead.
  :
  : On top of that, I figured out that we had a number of little gotchas:
  :
  : - It is possible for a guest to write an HPMN value that is out of
  :   bound, and it seems valuable to limit it
  :
  : - PMCR_EL0.N should be the maximum number of counters when read from
  :   EL2, and MDCR_EL2.HPMN when read from EL0/EL1
  :
  : - Prevent userspace from updating PMCR_EL0.N when EL2 is available"
  : .
  KVM: arm64: Let kvm_vcpu_read_pmcr() return an EL-dependent value for PMCR_EL0.N
  KVM: arm64: Handle out-of-bound write to MDCR_EL2.HPMN
  KVM: arm64: Don't let userspace write to PMCR_EL0.N when the vcpu has EL2
  KVM: arm64: Allow userspace to limit the number of PMU counters for EL2 VMs
  KVM: arm64: Contextualise the handling of PMCR_EL0.P writes
  KVM: arm64: Fix MDCR_EL2.HPMN reset value
  KVM: arm64: Repaint pmcr_n into nr_pmu_counters

Signed-off-by: Marc Zyngier <maz@kernel.org>
This commit is contained in:
Marc Zyngier 2025-04-29 13:38:21 +01:00
commit 67bd641517
5 changed files with 116 additions and 23 deletions

View File

@ -137,6 +137,30 @@ exit_reason = KVM_EXIT_FAIL_ENTRY and populate the fail_entry struct by setting
hardare_entry_failure_reason field to KVM_EXIT_FAIL_ENTRY_CPU_UNSUPPORTED and
the cpu field to the processor id.
1.5 ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_SET_NR_COUNTERS
--------------------------------------------------
:Parameters: in kvm_device_attr.addr the address to an unsigned int
representing the maximum value taken by PMCR_EL0.N
:Returns:
======= ====================================================
-EBUSY PMUv3 already initialized, a VCPU has already run or
an event filter has already been set
-EFAULT Error accessing the value pointed to by addr
-ENODEV PMUv3 not supported or GIC not initialized
-EINVAL No PMUv3 explicitly selected, or value of N out of
range
======= ====================================================
Set the number of implemented event counters in the virtual PMU. This
mandates that a PMU has explicitly been selected via
KVM_ARM_VCPU_PMU_V3_SET_PMU, and will fail when no PMU has been
explicitly selected, or the number of counters is out of range for the
selected PMU. Selecting a new PMU cancels the effect of setting this
attribute.
2. GROUP: KVM_ARM_VCPU_TIMER_CTRL
=================================

View File

@ -359,8 +359,8 @@ struct kvm_arch {
cpumask_var_t supported_cpus;
/* PMCR_EL0.N value for the guest */
u8 pmcr_n;
/* Maximum number of counters for the guest */
u8 nr_pmu_counters;
/* Iterator for idreg debugfs */
u8 idreg_debugfs_iter;

View File

@ -431,10 +431,11 @@ enum {
/* Device Control API on vcpu fd */
#define KVM_ARM_VCPU_PMU_V3_CTRL 0
#define KVM_ARM_VCPU_PMU_V3_IRQ 0
#define KVM_ARM_VCPU_PMU_V3_INIT 1
#define KVM_ARM_VCPU_PMU_V3_FILTER 2
#define KVM_ARM_VCPU_PMU_V3_SET_PMU 3
#define KVM_ARM_VCPU_PMU_V3_IRQ 0
#define KVM_ARM_VCPU_PMU_V3_INIT 1
#define KVM_ARM_VCPU_PMU_V3_FILTER 2
#define KVM_ARM_VCPU_PMU_V3_SET_PMU 3
#define KVM_ARM_VCPU_PMU_V3_SET_NR_COUNTERS 4
#define KVM_ARM_VCPU_TIMER_CTRL 1
#define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0
#define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1

View File

@ -280,7 +280,7 @@ static u64 kvm_pmu_hyp_counter_mask(struct kvm_vcpu *vcpu)
return 0;
hpmn = SYS_FIELD_GET(MDCR_EL2, HPMN, __vcpu_sys_reg(vcpu, MDCR_EL2));
n = vcpu->kvm->arch.pmcr_n;
n = vcpu->kvm->arch.nr_pmu_counters;
/*
* Programming HPMN to a value greater than PMCR_EL0.N is
@ -608,14 +608,12 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0);
if (val & ARMV8_PMU_PMCR_P) {
/*
* Unlike other PMU sysregs, the controls in PMCR_EL0 always apply
* to the 'guest' range of counters and never the 'hyp' range.
*/
unsigned long mask = kvm_pmu_implemented_counter_mask(vcpu) &
~kvm_pmu_hyp_counter_mask(vcpu) &
~BIT(ARMV8_PMU_CYCLE_IDX);
if (!vcpu_is_el2(vcpu))
mask &= ~kvm_pmu_hyp_counter_mask(vcpu);
for_each_set_bit(i, &mask, 32)
kvm_pmu_set_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, i), 0, true);
}
@ -1027,12 +1025,30 @@ u8 kvm_arm_pmu_get_max_counters(struct kvm *kvm)
return bitmap_weight(arm_pmu->cntr_mask, ARMV8_PMU_MAX_GENERAL_COUNTERS);
}
static void kvm_arm_set_nr_counters(struct kvm *kvm, unsigned int nr)
{
kvm->arch.nr_pmu_counters = nr;
/* Reset MDCR_EL2.HPMN behind the vcpus' back... */
if (test_bit(KVM_ARM_VCPU_HAS_EL2, kvm->arch.vcpu_features)) {
struct kvm_vcpu *vcpu;
unsigned long i;
kvm_for_each_vcpu(i, vcpu, kvm) {
u64 val = __vcpu_sys_reg(vcpu, MDCR_EL2);
val &= ~MDCR_EL2_HPMN;
val |= FIELD_PREP(MDCR_EL2_HPMN, kvm->arch.nr_pmu_counters);
__vcpu_sys_reg(vcpu, MDCR_EL2) = val;
}
}
}
static void kvm_arm_set_pmu(struct kvm *kvm, struct arm_pmu *arm_pmu)
{
lockdep_assert_held(&kvm->arch.config_lock);
kvm->arch.arm_pmu = arm_pmu;
kvm->arch.pmcr_n = kvm_arm_pmu_get_max_counters(kvm);
kvm_arm_set_nr_counters(kvm, kvm_arm_pmu_get_max_counters(kvm));
}
/**
@ -1088,6 +1104,20 @@ static int kvm_arm_pmu_v3_set_pmu(struct kvm_vcpu *vcpu, int pmu_id)
return ret;
}
static int kvm_arm_pmu_v3_set_nr_counters(struct kvm_vcpu *vcpu, unsigned int n)
{
struct kvm *kvm = vcpu->kvm;
if (!kvm->arch.arm_pmu)
return -EINVAL;
if (n > kvm_arm_pmu_get_max_counters(kvm))
return -EINVAL;
kvm_arm_set_nr_counters(kvm, n);
return 0;
}
int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
{
struct kvm *kvm = vcpu->kvm;
@ -1184,6 +1214,15 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
return kvm_arm_pmu_v3_set_pmu(vcpu, pmu_id);
}
case KVM_ARM_VCPU_PMU_V3_SET_NR_COUNTERS: {
unsigned int __user *uaddr = (unsigned int __user *)(long)attr->addr;
unsigned int n;
if (get_user(n, uaddr))
return -EFAULT;
return kvm_arm_pmu_v3_set_nr_counters(vcpu, n);
}
case KVM_ARM_VCPU_PMU_V3_INIT:
return kvm_arm_pmu_v3_init(vcpu);
}
@ -1222,6 +1261,7 @@ int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
case KVM_ARM_VCPU_PMU_V3_INIT:
case KVM_ARM_VCPU_PMU_V3_FILTER:
case KVM_ARM_VCPU_PMU_V3_SET_PMU:
case KVM_ARM_VCPU_PMU_V3_SET_NR_COUNTERS:
if (kvm_vcpu_has_pmu(vcpu))
return 0;
}
@ -1260,8 +1300,12 @@ u8 kvm_arm_pmu_get_pmuver_limit(void)
u64 kvm_vcpu_read_pmcr(struct kvm_vcpu *vcpu)
{
u64 pmcr = __vcpu_sys_reg(vcpu, PMCR_EL0);
u64 n = vcpu->kvm->arch.nr_pmu_counters;
return u64_replace_bits(pmcr, vcpu->kvm->arch.pmcr_n, ARMV8_PMU_PMCR_N);
if (vcpu_has_nv(vcpu) && !vcpu_is_el2(vcpu))
n = FIELD_GET(MDCR_EL2_HPMN, __vcpu_sys_reg(vcpu, MDCR_EL2));
return u64_replace_bits(pmcr, n, ARMV8_PMU_PMCR_N);
}
void kvm_pmu_nested_transition(struct kvm_vcpu *vcpu)

View File

@ -785,7 +785,7 @@ static unsigned int pmu_visibility(const struct kvm_vcpu *vcpu,
static u64 reset_pmu_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
{
u64 mask = BIT(ARMV8_PMU_CYCLE_IDX);
u8 n = vcpu->kvm->arch.pmcr_n;
u8 n = vcpu->kvm->arch.nr_pmu_counters;
if (n)
mask |= GENMASK(n - 1, 0);
@ -1216,8 +1216,9 @@ static int set_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
* with the existing KVM behavior.
*/
if (!kvm_vm_has_ran_once(kvm) &&
!vcpu_has_nv(vcpu) &&
new_n <= kvm_arm_pmu_get_max_counters(kvm))
kvm->arch.pmcr_n = new_n;
kvm->arch.nr_pmu_counters = new_n;
mutex_unlock(&kvm->arch.config_lock);
@ -2570,16 +2571,33 @@ static bool access_mdcr(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
u64 old = __vcpu_sys_reg(vcpu, MDCR_EL2);
u64 hpmn, val, old = __vcpu_sys_reg(vcpu, MDCR_EL2);
if (!access_rw(vcpu, p, r))
return false;
if (!p->is_write) {
p->regval = old;
return true;
}
val = p->regval;
hpmn = FIELD_GET(MDCR_EL2_HPMN, val);
/*
* Request a reload of the PMU to enable/disable the counters affected
* by HPME.
* If HPMN is out of bounds, limit it to what we actually
* support. This matches the UNKNOWN definition of the field
* in that case, and keeps the emulation simple. Sort of.
*/
if ((old ^ __vcpu_sys_reg(vcpu, MDCR_EL2)) & MDCR_EL2_HPME)
if (hpmn > vcpu->kvm->arch.nr_pmu_counters) {
hpmn = vcpu->kvm->arch.nr_pmu_counters;
u64_replace_bits(val, hpmn, MDCR_EL2_HPMN);
}
__vcpu_sys_reg(vcpu, MDCR_EL2) = val;
/*
* Request a reload of the PMU to enable/disable the counters
* affected by HPME.
*/
if ((old ^ val) & MDCR_EL2_HPME)
kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu);
return true;
@ -2698,6 +2716,12 @@ static int set_imp_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
.set_user = set_imp_id_reg, \
.reset = reset_imp_id_reg, \
.val = mask, \
}
static u64 reset_mdcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
{
__vcpu_sys_reg(vcpu, r->reg) = vcpu->kvm->arch.nr_pmu_counters;
return vcpu->kvm->arch.nr_pmu_counters;
}
/*
@ -3243,7 +3267,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
EL2_REG(SCTLR_EL2, access_rw, reset_val, SCTLR_EL2_RES1),
EL2_REG(ACTLR_EL2, access_rw, reset_val, 0),
EL2_REG_VNCR(HCR_EL2, reset_hcr, 0),
EL2_REG(MDCR_EL2, access_mdcr, reset_val, 0),
EL2_REG(MDCR_EL2, access_mdcr, reset_mdcr, 0),
EL2_REG(CPTR_EL2, access_rw, reset_val, CPTR_NVHE_EL2_RES1),
EL2_REG_VNCR(HSTR_EL2, reset_val, 0),
EL2_REG_VNCR(HFGRTR_EL2, reset_val, 0),