From 7755cec63adeecea3cbbf4032047812c37cf7cc3 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 17 Mar 2023 15:50:20 -0400 Subject: [PATCH 01/24] arm64: perf: Move PMUv3 driver to drivers/perf Having the ARM PMUv3 driver sitting in arch/arm64/kernel is getting in the way of being able to use perf on ARMv8 cores running a 32bit kernel, such as 32bit KVM guests. This patch moves it into drivers/perf/arm_pmuv3.c, with an include file in include/linux/perf/arm_pmuv3.h. The only thing left in arch/arm64 is some mundane perf stuff. Signed-off-by: Marc Zyngier Signed-off-by: Zaid Al-Bassam Tested-by: Florian Fainelli Link: https://lore.kernel.org/r/20230317195027.3746949-2-zalbassam@google.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/perf_event.h | 249 ----------------- arch/arm64/kernel/Makefile | 1 - drivers/perf/Kconfig | 11 + drivers/perf/Makefile | 1 + .../perf_event.c => drivers/perf/arm_pmuv3.c | 1 + include/kvm/arm_pmu.h | 2 +- include/linux/perf/arm_pmuv3.h | 258 ++++++++++++++++++ 7 files changed, 272 insertions(+), 251 deletions(-) rename arch/arm64/kernel/perf_event.c => drivers/perf/arm_pmuv3.c (99%) create mode 100644 include/linux/perf/arm_pmuv3.h diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h index 3eaf462f5752..eb7071c9eb34 100644 --- a/arch/arm64/include/asm/perf_event.h +++ b/arch/arm64/include/asm/perf_event.h @@ -9,255 +9,6 @@ #include #include -#define ARMV8_PMU_MAX_COUNTERS 32 -#define ARMV8_PMU_COUNTER_MASK (ARMV8_PMU_MAX_COUNTERS - 1) - -/* - * Common architectural and microarchitectural event numbers. - */ -#define ARMV8_PMUV3_PERFCTR_SW_INCR 0x0000 -#define ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL 0x0001 -#define ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL 0x0002 -#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL 0x0003 -#define ARMV8_PMUV3_PERFCTR_L1D_CACHE 0x0004 -#define ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL 0x0005 -#define ARMV8_PMUV3_PERFCTR_LD_RETIRED 0x0006 -#define ARMV8_PMUV3_PERFCTR_ST_RETIRED 0x0007 -#define ARMV8_PMUV3_PERFCTR_INST_RETIRED 0x0008 -#define ARMV8_PMUV3_PERFCTR_EXC_TAKEN 0x0009 -#define ARMV8_PMUV3_PERFCTR_EXC_RETURN 0x000A -#define ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED 0x000B -#define ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED 0x000C -#define ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED 0x000D -#define ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED 0x000E -#define ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED 0x000F -#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED 0x0010 -#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES 0x0011 -#define ARMV8_PMUV3_PERFCTR_BR_PRED 0x0012 -#define ARMV8_PMUV3_PERFCTR_MEM_ACCESS 0x0013 -#define ARMV8_PMUV3_PERFCTR_L1I_CACHE 0x0014 -#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB 0x0015 -#define ARMV8_PMUV3_PERFCTR_L2D_CACHE 0x0016 -#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL 0x0017 -#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB 0x0018 -#define ARMV8_PMUV3_PERFCTR_BUS_ACCESS 0x0019 -#define ARMV8_PMUV3_PERFCTR_MEMORY_ERROR 0x001A -#define ARMV8_PMUV3_PERFCTR_INST_SPEC 0x001B -#define ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED 0x001C -#define ARMV8_PMUV3_PERFCTR_BUS_CYCLES 0x001D -#define ARMV8_PMUV3_PERFCTR_CHAIN 0x001E -#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE 0x001F -#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE 0x0020 -#define ARMV8_PMUV3_PERFCTR_BR_RETIRED 0x0021 -#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED 0x0022 -#define ARMV8_PMUV3_PERFCTR_STALL_FRONTEND 0x0023 -#define ARMV8_PMUV3_PERFCTR_STALL_BACKEND 0x0024 -#define ARMV8_PMUV3_PERFCTR_L1D_TLB 0x0025 -#define ARMV8_PMUV3_PERFCTR_L1I_TLB 0x0026 -#define ARMV8_PMUV3_PERFCTR_L2I_CACHE 0x0027 -#define ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL 0x0028 -#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE 0x0029 -#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL 0x002A -#define ARMV8_PMUV3_PERFCTR_L3D_CACHE 0x002B -#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB 0x002C -#define ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL 0x002D -#define ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL 0x002E -#define ARMV8_PMUV3_PERFCTR_L2D_TLB 0x002F -#define ARMV8_PMUV3_PERFCTR_L2I_TLB 0x0030 -#define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS 0x0031 -#define ARMV8_PMUV3_PERFCTR_LL_CACHE 0x0032 -#define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS 0x0033 -#define ARMV8_PMUV3_PERFCTR_DTLB_WALK 0x0034 -#define ARMV8_PMUV3_PERFCTR_ITLB_WALK 0x0035 -#define ARMV8_PMUV3_PERFCTR_LL_CACHE_RD 0x0036 -#define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD 0x0037 -#define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD 0x0038 -#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_LMISS_RD 0x0039 -#define ARMV8_PMUV3_PERFCTR_OP_RETIRED 0x003A -#define ARMV8_PMUV3_PERFCTR_OP_SPEC 0x003B -#define ARMV8_PMUV3_PERFCTR_STALL 0x003C -#define ARMV8_PMUV3_PERFCTR_STALL_SLOT_BACKEND 0x003D -#define ARMV8_PMUV3_PERFCTR_STALL_SLOT_FRONTEND 0x003E -#define ARMV8_PMUV3_PERFCTR_STALL_SLOT 0x003F - -/* Statistical profiling extension microarchitectural events */ -#define ARMV8_SPE_PERFCTR_SAMPLE_POP 0x4000 -#define ARMV8_SPE_PERFCTR_SAMPLE_FEED 0x4001 -#define ARMV8_SPE_PERFCTR_SAMPLE_FILTRATE 0x4002 -#define ARMV8_SPE_PERFCTR_SAMPLE_COLLISION 0x4003 - -/* AMUv1 architecture events */ -#define ARMV8_AMU_PERFCTR_CNT_CYCLES 0x4004 -#define ARMV8_AMU_PERFCTR_STALL_BACKEND_MEM 0x4005 - -/* long-latency read miss events */ -#define ARMV8_PMUV3_PERFCTR_L1I_CACHE_LMISS 0x4006 -#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_LMISS_RD 0x4009 -#define ARMV8_PMUV3_PERFCTR_L2I_CACHE_LMISS 0x400A -#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_LMISS_RD 0x400B - -/* Trace buffer events */ -#define ARMV8_PMUV3_PERFCTR_TRB_WRAP 0x400C -#define ARMV8_PMUV3_PERFCTR_TRB_TRIG 0x400E - -/* Trace unit events */ -#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT0 0x4010 -#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT1 0x4011 -#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT2 0x4012 -#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT3 0x4013 -#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT4 0x4018 -#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT5 0x4019 -#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT6 0x401A -#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT7 0x401B - -/* additional latency from alignment events */ -#define ARMV8_PMUV3_PERFCTR_LDST_ALIGN_LAT 0x4020 -#define ARMV8_PMUV3_PERFCTR_LD_ALIGN_LAT 0x4021 -#define ARMV8_PMUV3_PERFCTR_ST_ALIGN_LAT 0x4022 - -/* Armv8.5 Memory Tagging Extension events */ -#define ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED 0x4024 -#define ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_RD 0x4025 -#define ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_WR 0x4026 - -/* ARMv8 recommended implementation defined event types */ -#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD 0x0040 -#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR 0x0041 -#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD 0x0042 -#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR 0x0043 -#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_INNER 0x0044 -#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_OUTER 0x0045 -#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_VICTIM 0x0046 -#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_CLEAN 0x0047 -#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_INVAL 0x0048 - -#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD 0x004C -#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR 0x004D -#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD 0x004E -#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR 0x004F -#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_RD 0x0050 -#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WR 0x0051 -#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_RD 0x0052 -#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_WR 0x0053 - -#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_VICTIM 0x0056 -#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_CLEAN 0x0057 -#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_INVAL 0x0058 - -#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_RD 0x005C -#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_WR 0x005D -#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_RD 0x005E -#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_WR 0x005F -#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD 0x0060 -#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR 0x0061 -#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_SHARED 0x0062 -#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NOT_SHARED 0x0063 -#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NORMAL 0x0064 -#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_PERIPH 0x0065 -#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_RD 0x0066 -#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_WR 0x0067 -#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LD_SPEC 0x0068 -#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_ST_SPEC 0x0069 -#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LDST_SPEC 0x006A - -#define ARMV8_IMPDEF_PERFCTR_LDREX_SPEC 0x006C -#define ARMV8_IMPDEF_PERFCTR_STREX_PASS_SPEC 0x006D -#define ARMV8_IMPDEF_PERFCTR_STREX_FAIL_SPEC 0x006E -#define ARMV8_IMPDEF_PERFCTR_STREX_SPEC 0x006F -#define ARMV8_IMPDEF_PERFCTR_LD_SPEC 0x0070 -#define ARMV8_IMPDEF_PERFCTR_ST_SPEC 0x0071 -#define ARMV8_IMPDEF_PERFCTR_LDST_SPEC 0x0072 -#define ARMV8_IMPDEF_PERFCTR_DP_SPEC 0x0073 -#define ARMV8_IMPDEF_PERFCTR_ASE_SPEC 0x0074 -#define ARMV8_IMPDEF_PERFCTR_VFP_SPEC 0x0075 -#define ARMV8_IMPDEF_PERFCTR_PC_WRITE_SPEC 0x0076 -#define ARMV8_IMPDEF_PERFCTR_CRYPTO_SPEC 0x0077 -#define ARMV8_IMPDEF_PERFCTR_BR_IMMED_SPEC 0x0078 -#define ARMV8_IMPDEF_PERFCTR_BR_RETURN_SPEC 0x0079 -#define ARMV8_IMPDEF_PERFCTR_BR_INDIRECT_SPEC 0x007A - -#define ARMV8_IMPDEF_PERFCTR_ISB_SPEC 0x007C -#define ARMV8_IMPDEF_PERFCTR_DSB_SPEC 0x007D -#define ARMV8_IMPDEF_PERFCTR_DMB_SPEC 0x007E - -#define ARMV8_IMPDEF_PERFCTR_EXC_UNDEF 0x0081 -#define ARMV8_IMPDEF_PERFCTR_EXC_SVC 0x0082 -#define ARMV8_IMPDEF_PERFCTR_EXC_PABORT 0x0083 -#define ARMV8_IMPDEF_PERFCTR_EXC_DABORT 0x0084 - -#define ARMV8_IMPDEF_PERFCTR_EXC_IRQ 0x0086 -#define ARMV8_IMPDEF_PERFCTR_EXC_FIQ 0x0087 -#define ARMV8_IMPDEF_PERFCTR_EXC_SMC 0x0088 - -#define ARMV8_IMPDEF_PERFCTR_EXC_HVC 0x008A -#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_PABORT 0x008B -#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_DABORT 0x008C -#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_OTHER 0x008D -#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_IRQ 0x008E -#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_FIQ 0x008F -#define ARMV8_IMPDEF_PERFCTR_RC_LD_SPEC 0x0090 -#define ARMV8_IMPDEF_PERFCTR_RC_ST_SPEC 0x0091 - -#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_RD 0x00A0 -#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WR 0x00A1 -#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_RD 0x00A2 -#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_WR 0x00A3 - -#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_VICTIM 0x00A6 -#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_CLEAN 0x00A7 -#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_INVAL 0x00A8 - -/* - * Per-CPU PMCR: config reg - */ -#define ARMV8_PMU_PMCR_E (1 << 0) /* Enable all counters */ -#define ARMV8_PMU_PMCR_P (1 << 1) /* Reset all counters */ -#define ARMV8_PMU_PMCR_C (1 << 2) /* Cycle counter reset */ -#define ARMV8_PMU_PMCR_D (1 << 3) /* CCNT counts every 64th cpu cycle */ -#define ARMV8_PMU_PMCR_X (1 << 4) /* Export to ETM */ -#define ARMV8_PMU_PMCR_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ -#define ARMV8_PMU_PMCR_LC (1 << 6) /* Overflow on 64 bit cycle counter */ -#define ARMV8_PMU_PMCR_LP (1 << 7) /* Long event counter enable */ -#define ARMV8_PMU_PMCR_N_SHIFT 11 /* Number of counters supported */ -#define ARMV8_PMU_PMCR_N_MASK 0x1f -#define ARMV8_PMU_PMCR_MASK 0xff /* Mask for writable bits */ - -/* - * PMOVSR: counters overflow flag status reg - */ -#define ARMV8_PMU_OVSR_MASK 0xffffffff /* Mask for writable bits */ -#define ARMV8_PMU_OVERFLOWED_MASK ARMV8_PMU_OVSR_MASK - -/* - * PMXEVTYPER: Event selection reg - */ -#define ARMV8_PMU_EVTYPE_MASK 0xc800ffff /* Mask for writable bits */ -#define ARMV8_PMU_EVTYPE_EVENT 0xffff /* Mask for EVENT bits */ - -/* - * Event filters for PMUv3 - */ -#define ARMV8_PMU_EXCLUDE_EL1 (1U << 31) -#define ARMV8_PMU_EXCLUDE_EL0 (1U << 30) -#define ARMV8_PMU_INCLUDE_EL2 (1U << 27) - -/* - * PMUSERENR: user enable reg - */ -#define ARMV8_PMU_USERENR_MASK 0xf /* Mask for writable bits */ -#define ARMV8_PMU_USERENR_EN (1 << 0) /* PMU regs can be accessed at EL0 */ -#define ARMV8_PMU_USERENR_SW (1 << 1) /* PMSWINC can be written at EL0 */ -#define ARMV8_PMU_USERENR_CR (1 << 2) /* Cycle counter can be read at EL0 */ -#define ARMV8_PMU_USERENR_ER (1 << 3) /* Event counter can be read at EL0 */ - -/* PMMIR_EL1.SLOTS mask */ -#define ARMV8_PMU_SLOTS_MASK 0xff - -#define ARMV8_PMU_BUS_SLOTS_SHIFT 8 -#define ARMV8_PMU_BUS_SLOTS_MASK 0xff -#define ARMV8_PMU_BUS_WIDTH_SHIFT 16 -#define ARMV8_PMU_BUS_WIDTH_MASK 0xf - #ifdef CONFIG_PERF_EVENTS struct pt_regs; extern unsigned long perf_instruction_pointer(struct pt_regs *regs); diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index ceba6792f5b3..7c2bb4e72476 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -45,7 +45,6 @@ obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_ARM64_MODULE_PLTS) += module-plts.o obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o -obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_CPU_PM) += sleep.o suspend.o obj-$(CONFIG_CPU_IDLE) += cpuidle.o diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index 66c259000a44..defe6b47854b 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -100,6 +100,17 @@ config ARM_SMMU_V3_PMU through the SMMU and allow the resulting information to be filtered based on the Stream ID of the corresponding master. +config ARM_PMUV3 + depends on HW_PERF_EVENTS && ARM64 + bool "ARM PMUv3 support" if !ARM64 + default y + help + Say y if you want to use the ARM performance monitor unit (PMU) + version 3. The PMUv3 is the CPU performance monitors on ARMv8 + (aarch32 and aarch64) systems that implement the PMUv3 + architecture. + Currently, PMUv3 is only supported on aarch64 (arm64) + config ARM_DSU_PMU tristate "ARM DynamIQ Shared Unit (DSU) PMU" depends on ARM64 diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile index 13e45da61100..dabc859540ce 100644 --- a/drivers/perf/Makefile +++ b/drivers/perf/Makefile @@ -5,6 +5,7 @@ obj-$(CONFIG_ARM_CMN) += arm-cmn.o obj-$(CONFIG_ARM_DSU_PMU) += arm_dsu_pmu.o obj-$(CONFIG_ARM_PMU) += arm_pmu.o arm_pmu_platform.o obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o +obj-$(CONFIG_ARM_PMUV3) += arm_pmuv3.o obj-$(CONFIG_ARM_SMMU_V3_PMU) += arm_smmuv3_pmu.o obj-$(CONFIG_FSL_IMX8_DDR_PMU) += fsl_imx8_ddr_perf.o obj-$(CONFIG_HISI_PMU) += hisilicon/ diff --git a/arch/arm64/kernel/perf_event.c b/drivers/perf/arm_pmuv3.c similarity index 99% rename from arch/arm64/kernel/perf_event.c rename to drivers/perf/arm_pmuv3.c index dde06c0f97f3..c25203f8b940 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/drivers/perf/arm_pmuv3.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index 628775334d5e..1a6a695ca67a 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -8,7 +8,7 @@ #define __ASM_ARM_KVM_PMU_H #include -#include +#include #define ARMV8_PMU_CYCLE_IDX (ARMV8_PMU_MAX_COUNTERS - 1) diff --git a/include/linux/perf/arm_pmuv3.h b/include/linux/perf/arm_pmuv3.h new file mode 100644 index 000000000000..53173abfc022 --- /dev/null +++ b/include/linux/perf/arm_pmuv3.h @@ -0,0 +1,258 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2012 ARM Ltd. + */ + +#ifndef __PERF_ARM_PMUV3_H +#define __PERF_ARM_PMUV3_H + +#define ARMV8_PMU_MAX_COUNTERS 32 +#define ARMV8_PMU_COUNTER_MASK (ARMV8_PMU_MAX_COUNTERS - 1) + +/* + * Common architectural and microarchitectural event numbers. + */ +#define ARMV8_PMUV3_PERFCTR_SW_INCR 0x0000 +#define ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL 0x0001 +#define ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL 0x0002 +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL 0x0003 +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE 0x0004 +#define ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL 0x0005 +#define ARMV8_PMUV3_PERFCTR_LD_RETIRED 0x0006 +#define ARMV8_PMUV3_PERFCTR_ST_RETIRED 0x0007 +#define ARMV8_PMUV3_PERFCTR_INST_RETIRED 0x0008 +#define ARMV8_PMUV3_PERFCTR_EXC_TAKEN 0x0009 +#define ARMV8_PMUV3_PERFCTR_EXC_RETURN 0x000A +#define ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED 0x000B +#define ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED 0x000C +#define ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED 0x000D +#define ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED 0x000E +#define ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED 0x000F +#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED 0x0010 +#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES 0x0011 +#define ARMV8_PMUV3_PERFCTR_BR_PRED 0x0012 +#define ARMV8_PMUV3_PERFCTR_MEM_ACCESS 0x0013 +#define ARMV8_PMUV3_PERFCTR_L1I_CACHE 0x0014 +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB 0x0015 +#define ARMV8_PMUV3_PERFCTR_L2D_CACHE 0x0016 +#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL 0x0017 +#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB 0x0018 +#define ARMV8_PMUV3_PERFCTR_BUS_ACCESS 0x0019 +#define ARMV8_PMUV3_PERFCTR_MEMORY_ERROR 0x001A +#define ARMV8_PMUV3_PERFCTR_INST_SPEC 0x001B +#define ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED 0x001C +#define ARMV8_PMUV3_PERFCTR_BUS_CYCLES 0x001D +#define ARMV8_PMUV3_PERFCTR_CHAIN 0x001E +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE 0x001F +#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE 0x0020 +#define ARMV8_PMUV3_PERFCTR_BR_RETIRED 0x0021 +#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED 0x0022 +#define ARMV8_PMUV3_PERFCTR_STALL_FRONTEND 0x0023 +#define ARMV8_PMUV3_PERFCTR_STALL_BACKEND 0x0024 +#define ARMV8_PMUV3_PERFCTR_L1D_TLB 0x0025 +#define ARMV8_PMUV3_PERFCTR_L1I_TLB 0x0026 +#define ARMV8_PMUV3_PERFCTR_L2I_CACHE 0x0027 +#define ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL 0x0028 +#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE 0x0029 +#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL 0x002A +#define ARMV8_PMUV3_PERFCTR_L3D_CACHE 0x002B +#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB 0x002C +#define ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL 0x002D +#define ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL 0x002E +#define ARMV8_PMUV3_PERFCTR_L2D_TLB 0x002F +#define ARMV8_PMUV3_PERFCTR_L2I_TLB 0x0030 +#define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS 0x0031 +#define ARMV8_PMUV3_PERFCTR_LL_CACHE 0x0032 +#define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS 0x0033 +#define ARMV8_PMUV3_PERFCTR_DTLB_WALK 0x0034 +#define ARMV8_PMUV3_PERFCTR_ITLB_WALK 0x0035 +#define ARMV8_PMUV3_PERFCTR_LL_CACHE_RD 0x0036 +#define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD 0x0037 +#define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD 0x0038 +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_LMISS_RD 0x0039 +#define ARMV8_PMUV3_PERFCTR_OP_RETIRED 0x003A +#define ARMV8_PMUV3_PERFCTR_OP_SPEC 0x003B +#define ARMV8_PMUV3_PERFCTR_STALL 0x003C +#define ARMV8_PMUV3_PERFCTR_STALL_SLOT_BACKEND 0x003D +#define ARMV8_PMUV3_PERFCTR_STALL_SLOT_FRONTEND 0x003E +#define ARMV8_PMUV3_PERFCTR_STALL_SLOT 0x003F + +/* Statistical profiling extension microarchitectural events */ +#define ARMV8_SPE_PERFCTR_SAMPLE_POP 0x4000 +#define ARMV8_SPE_PERFCTR_SAMPLE_FEED 0x4001 +#define ARMV8_SPE_PERFCTR_SAMPLE_FILTRATE 0x4002 +#define ARMV8_SPE_PERFCTR_SAMPLE_COLLISION 0x4003 + +/* AMUv1 architecture events */ +#define ARMV8_AMU_PERFCTR_CNT_CYCLES 0x4004 +#define ARMV8_AMU_PERFCTR_STALL_BACKEND_MEM 0x4005 + +/* long-latency read miss events */ +#define ARMV8_PMUV3_PERFCTR_L1I_CACHE_LMISS 0x4006 +#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_LMISS_RD 0x4009 +#define ARMV8_PMUV3_PERFCTR_L2I_CACHE_LMISS 0x400A +#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_LMISS_RD 0x400B + +/* Trace buffer events */ +#define ARMV8_PMUV3_PERFCTR_TRB_WRAP 0x400C +#define ARMV8_PMUV3_PERFCTR_TRB_TRIG 0x400E + +/* Trace unit events */ +#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT0 0x4010 +#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT1 0x4011 +#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT2 0x4012 +#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT3 0x4013 +#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT4 0x4018 +#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT5 0x4019 +#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT6 0x401A +#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT7 0x401B + +/* additional latency from alignment events */ +#define ARMV8_PMUV3_PERFCTR_LDST_ALIGN_LAT 0x4020 +#define ARMV8_PMUV3_PERFCTR_LD_ALIGN_LAT 0x4021 +#define ARMV8_PMUV3_PERFCTR_ST_ALIGN_LAT 0x4022 + +/* Armv8.5 Memory Tagging Extension events */ +#define ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED 0x4024 +#define ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_RD 0x4025 +#define ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_WR 0x4026 + +/* ARMv8 recommended implementation defined event types */ +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD 0x0040 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR 0x0041 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD 0x0042 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR 0x0043 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_INNER 0x0044 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_OUTER 0x0045 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_VICTIM 0x0046 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_CLEAN 0x0047 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_INVAL 0x0048 + +#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD 0x004C +#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR 0x004D +#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD 0x004E +#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR 0x004F +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_RD 0x0050 +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WR 0x0051 +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_RD 0x0052 +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_WR 0x0053 + +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_VICTIM 0x0056 +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_CLEAN 0x0057 +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_INVAL 0x0058 + +#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_RD 0x005C +#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_WR 0x005D +#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_RD 0x005E +#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_WR 0x005F +#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD 0x0060 +#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR 0x0061 +#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_SHARED 0x0062 +#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NOT_SHARED 0x0063 +#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NORMAL 0x0064 +#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_PERIPH 0x0065 +#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_RD 0x0066 +#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_WR 0x0067 +#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LD_SPEC 0x0068 +#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_ST_SPEC 0x0069 +#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LDST_SPEC 0x006A + +#define ARMV8_IMPDEF_PERFCTR_LDREX_SPEC 0x006C +#define ARMV8_IMPDEF_PERFCTR_STREX_PASS_SPEC 0x006D +#define ARMV8_IMPDEF_PERFCTR_STREX_FAIL_SPEC 0x006E +#define ARMV8_IMPDEF_PERFCTR_STREX_SPEC 0x006F +#define ARMV8_IMPDEF_PERFCTR_LD_SPEC 0x0070 +#define ARMV8_IMPDEF_PERFCTR_ST_SPEC 0x0071 +#define ARMV8_IMPDEF_PERFCTR_LDST_SPEC 0x0072 +#define ARMV8_IMPDEF_PERFCTR_DP_SPEC 0x0073 +#define ARMV8_IMPDEF_PERFCTR_ASE_SPEC 0x0074 +#define ARMV8_IMPDEF_PERFCTR_VFP_SPEC 0x0075 +#define ARMV8_IMPDEF_PERFCTR_PC_WRITE_SPEC 0x0076 +#define ARMV8_IMPDEF_PERFCTR_CRYPTO_SPEC 0x0077 +#define ARMV8_IMPDEF_PERFCTR_BR_IMMED_SPEC 0x0078 +#define ARMV8_IMPDEF_PERFCTR_BR_RETURN_SPEC 0x0079 +#define ARMV8_IMPDEF_PERFCTR_BR_INDIRECT_SPEC 0x007A + +#define ARMV8_IMPDEF_PERFCTR_ISB_SPEC 0x007C +#define ARMV8_IMPDEF_PERFCTR_DSB_SPEC 0x007D +#define ARMV8_IMPDEF_PERFCTR_DMB_SPEC 0x007E + +#define ARMV8_IMPDEF_PERFCTR_EXC_UNDEF 0x0081 +#define ARMV8_IMPDEF_PERFCTR_EXC_SVC 0x0082 +#define ARMV8_IMPDEF_PERFCTR_EXC_PABORT 0x0083 +#define ARMV8_IMPDEF_PERFCTR_EXC_DABORT 0x0084 + +#define ARMV8_IMPDEF_PERFCTR_EXC_IRQ 0x0086 +#define ARMV8_IMPDEF_PERFCTR_EXC_FIQ 0x0087 +#define ARMV8_IMPDEF_PERFCTR_EXC_SMC 0x0088 + +#define ARMV8_IMPDEF_PERFCTR_EXC_HVC 0x008A +#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_PABORT 0x008B +#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_DABORT 0x008C +#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_OTHER 0x008D +#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_IRQ 0x008E +#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_FIQ 0x008F +#define ARMV8_IMPDEF_PERFCTR_RC_LD_SPEC 0x0090 +#define ARMV8_IMPDEF_PERFCTR_RC_ST_SPEC 0x0091 + +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_RD 0x00A0 +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WR 0x00A1 +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_RD 0x00A2 +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_WR 0x00A3 + +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_VICTIM 0x00A6 +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_CLEAN 0x00A7 +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_INVAL 0x00A8 + +/* + * Per-CPU PMCR: config reg + */ +#define ARMV8_PMU_PMCR_E (1 << 0) /* Enable all counters */ +#define ARMV8_PMU_PMCR_P (1 << 1) /* Reset all counters */ +#define ARMV8_PMU_PMCR_C (1 << 2) /* Cycle counter reset */ +#define ARMV8_PMU_PMCR_D (1 << 3) /* CCNT counts every 64th cpu cycle */ +#define ARMV8_PMU_PMCR_X (1 << 4) /* Export to ETM */ +#define ARMV8_PMU_PMCR_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ +#define ARMV8_PMU_PMCR_LC (1 << 6) /* Overflow on 64 bit cycle counter */ +#define ARMV8_PMU_PMCR_LP (1 << 7) /* Long event counter enable */ +#define ARMV8_PMU_PMCR_N_SHIFT 11 /* Number of counters supported */ +#define ARMV8_PMU_PMCR_N_MASK 0x1f +#define ARMV8_PMU_PMCR_MASK 0xff /* Mask for writable bits */ + +/* + * PMOVSR: counters overflow flag status reg + */ +#define ARMV8_PMU_OVSR_MASK 0xffffffff /* Mask for writable bits */ +#define ARMV8_PMU_OVERFLOWED_MASK ARMV8_PMU_OVSR_MASK + +/* + * PMXEVTYPER: Event selection reg + */ +#define ARMV8_PMU_EVTYPE_MASK 0xc800ffff /* Mask for writable bits */ +#define ARMV8_PMU_EVTYPE_EVENT 0xffff /* Mask for EVENT bits */ + +/* + * Event filters for PMUv3 + */ +#define ARMV8_PMU_EXCLUDE_EL1 (1U << 31) +#define ARMV8_PMU_EXCLUDE_EL0 (1U << 30) +#define ARMV8_PMU_INCLUDE_EL2 (1U << 27) + +/* + * PMUSERENR: user enable reg + */ +#define ARMV8_PMU_USERENR_MASK 0xf /* Mask for writable bits */ +#define ARMV8_PMU_USERENR_EN (1 << 0) /* PMU regs can be accessed at EL0 */ +#define ARMV8_PMU_USERENR_SW (1 << 1) /* PMSWINC can be written at EL0 */ +#define ARMV8_PMU_USERENR_CR (1 << 2) /* Cycle counter can be read at EL0 */ +#define ARMV8_PMU_USERENR_ER (1 << 3) /* Event counter can be read at EL0 */ + +/* PMMIR_EL1.SLOTS mask */ +#define ARMV8_PMU_SLOTS_MASK 0xff + +#define ARMV8_PMU_BUS_SLOTS_SHIFT 8 +#define ARMV8_PMU_BUS_SLOTS_MASK 0xff +#define ARMV8_PMU_BUS_WIDTH_SHIFT 16 +#define ARMV8_PMU_BUS_WIDTH_MASK 0xf + +#endif From df29ddf4f04b00cf60669686dc7f534c3ed7c433 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 17 Mar 2023 15:50:21 -0400 Subject: [PATCH 02/24] arm64: perf: Abstract system register accesses away As we want to enable 32bit support, we need to distanciate the PMUv3 driver from the AArch64 system register names. This patch moves all system register accesses to an architecture specific include file, allowing the 32bit counterpart to be slotted in at a later time. Signed-off-by: Marc Zyngier Co-developed-by: Zaid Al-Bassam Signed-off-by: Zaid Al-Bassam Tested-by: Florian Fainelli Link: https://lore.kernel.org/r/20230317195027.3746949-3-zalbassam@google.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/arm_pmuv3.h | 137 +++++++++++++++++++++++++++++ drivers/perf/arm_pmuv3.c | 115 +++++------------------- include/linux/perf/arm_pmuv3.h | 45 ++++++++++ 3 files changed, 205 insertions(+), 92 deletions(-) create mode 100644 arch/arm64/include/asm/arm_pmuv3.h diff --git a/arch/arm64/include/asm/arm_pmuv3.h b/arch/arm64/include/asm/arm_pmuv3.h new file mode 100644 index 000000000000..c444cbfb3acd --- /dev/null +++ b/arch/arm64/include/asm/arm_pmuv3.h @@ -0,0 +1,137 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2012 ARM Ltd. + */ + +#ifndef __ASM_PMUV3_H +#define __ASM_PMUV3_H + +#include +#include + +#define RETURN_READ_PMEVCNTRN(n) \ + return read_sysreg(pmevcntr##n##_el0) +static unsigned long read_pmevcntrn(int n) +{ + PMEVN_SWITCH(n, RETURN_READ_PMEVCNTRN); + return 0; +} + +#define WRITE_PMEVCNTRN(n) \ + write_sysreg(val, pmevcntr##n##_el0) +static void write_pmevcntrn(int n, unsigned long val) +{ + PMEVN_SWITCH(n, WRITE_PMEVCNTRN); +} + +#define WRITE_PMEVTYPERN(n) \ + write_sysreg(val, pmevtyper##n##_el0) +static void write_pmevtypern(int n, unsigned long val) +{ + PMEVN_SWITCH(n, WRITE_PMEVTYPERN); +} + +static inline unsigned long read_pmmir(void) +{ + return read_cpuid(PMMIR_EL1); +} + +static inline u32 read_pmuver(void) +{ + u64 dfr0 = read_sysreg(id_aa64dfr0_el1); + + return cpuid_feature_extract_unsigned_field(dfr0, + ID_AA64DFR0_EL1_PMUVer_SHIFT); +} + +static inline void write_pmcr(u32 val) +{ + write_sysreg(val, pmcr_el0); +} + +static inline u32 read_pmcr(void) +{ + return read_sysreg(pmcr_el0); +} + +static inline void write_pmselr(u32 val) +{ + write_sysreg(val, pmselr_el0); +} + +static inline void write_pmccntr(u64 val) +{ + write_sysreg(val, pmccntr_el0); +} + +static inline u64 read_pmccntr(void) +{ + return read_sysreg(pmccntr_el0); +} + +static inline void write_pmxevcntr(u32 val) +{ + write_sysreg(val, pmxevcntr_el0); +} + +static inline u32 read_pmxevcntr(void) +{ + return read_sysreg(pmxevcntr_el0); +} + +static inline void write_pmxevtyper(u32 val) +{ + write_sysreg(val, pmxevtyper_el0); +} + +static inline void write_pmcntenset(u32 val) +{ + write_sysreg(val, pmcntenset_el0); +} + +static inline void write_pmcntenclr(u32 val) +{ + write_sysreg(val, pmcntenclr_el0); +} + +static inline void write_pmintenset(u32 val) +{ + write_sysreg(val, pmintenset_el1); +} + +static inline void write_pmintenclr(u32 val) +{ + write_sysreg(val, pmintenclr_el1); +} + +static inline void write_pmccfiltr(u32 val) +{ + write_sysreg(val, pmccfiltr_el0); +} + +static inline void write_pmovsclr(u32 val) +{ + write_sysreg(val, pmovsclr_el0); +} + +static inline u32 read_pmovsclr(void) +{ + return read_sysreg(pmovsclr_el0); +} + +static inline void write_pmuserenr(u32 val) +{ + write_sysreg(val, pmuserenr_el0); +} + +static inline u32 read_pmceid0(void) +{ + return read_sysreg(pmceid0_el0); +} + +static inline u32 read_pmceid1(void) +{ + return read_sysreg(pmceid1_el0); +} + +#endif diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index c25203f8b940..f783f068d612 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -10,7 +10,6 @@ #include #include -#include #include #include @@ -25,6 +24,8 @@ #include #include +#include + /* ARMv8 Cortex-A53 specific event types. */ #define ARMV8_A53_PERFCTR_PREF_LINEFILL 0xC2 @@ -425,83 +426,16 @@ static inline bool armv8pmu_event_is_chained(struct perf_event *event) #define ARMV8_IDX_TO_COUNTER(x) \ (((x) - ARMV8_IDX_COUNTER0) & ARMV8_PMU_COUNTER_MASK) -/* - * This code is really good - */ - -#define PMEVN_CASE(n, case_macro) \ - case n: case_macro(n); break - -#define PMEVN_SWITCH(x, case_macro) \ - do { \ - switch (x) { \ - PMEVN_CASE(0, case_macro); \ - PMEVN_CASE(1, case_macro); \ - PMEVN_CASE(2, case_macro); \ - PMEVN_CASE(3, case_macro); \ - PMEVN_CASE(4, case_macro); \ - PMEVN_CASE(5, case_macro); \ - PMEVN_CASE(6, case_macro); \ - PMEVN_CASE(7, case_macro); \ - PMEVN_CASE(8, case_macro); \ - PMEVN_CASE(9, case_macro); \ - PMEVN_CASE(10, case_macro); \ - PMEVN_CASE(11, case_macro); \ - PMEVN_CASE(12, case_macro); \ - PMEVN_CASE(13, case_macro); \ - PMEVN_CASE(14, case_macro); \ - PMEVN_CASE(15, case_macro); \ - PMEVN_CASE(16, case_macro); \ - PMEVN_CASE(17, case_macro); \ - PMEVN_CASE(18, case_macro); \ - PMEVN_CASE(19, case_macro); \ - PMEVN_CASE(20, case_macro); \ - PMEVN_CASE(21, case_macro); \ - PMEVN_CASE(22, case_macro); \ - PMEVN_CASE(23, case_macro); \ - PMEVN_CASE(24, case_macro); \ - PMEVN_CASE(25, case_macro); \ - PMEVN_CASE(26, case_macro); \ - PMEVN_CASE(27, case_macro); \ - PMEVN_CASE(28, case_macro); \ - PMEVN_CASE(29, case_macro); \ - PMEVN_CASE(30, case_macro); \ - default: WARN(1, "Invalid PMEV* index\n"); \ - } \ - } while (0) - -#define RETURN_READ_PMEVCNTRN(n) \ - return read_sysreg(pmevcntr##n##_el0) -static unsigned long read_pmevcntrn(int n) -{ - PMEVN_SWITCH(n, RETURN_READ_PMEVCNTRN); - return 0; -} - -#define WRITE_PMEVCNTRN(n) \ - write_sysreg(val, pmevcntr##n##_el0) -static void write_pmevcntrn(int n, unsigned long val) -{ - PMEVN_SWITCH(n, WRITE_PMEVCNTRN); -} - -#define WRITE_PMEVTYPERN(n) \ - write_sysreg(val, pmevtyper##n##_el0) -static void write_pmevtypern(int n, unsigned long val) -{ - PMEVN_SWITCH(n, WRITE_PMEVTYPERN); -} - static inline u32 armv8pmu_pmcr_read(void) { - return read_sysreg(pmcr_el0); + return read_pmcr(); } static inline void armv8pmu_pmcr_write(u32 val) { val &= ARMV8_PMU_PMCR_MASK; isb(); - write_sysreg(val, pmcr_el0); + write_pmcr(val); } static inline int armv8pmu_has_overflowed(u32 pmovsr) @@ -576,7 +510,7 @@ static u64 armv8pmu_read_counter(struct perf_event *event) u64 value; if (idx == ARMV8_IDX_CYCLE_COUNTER) - value = read_sysreg(pmccntr_el0); + value = read_pmccntr(); else value = armv8pmu_read_hw_counter(event); @@ -611,7 +545,7 @@ static void armv8pmu_write_counter(struct perf_event *event, u64 value) value = armv8pmu_bias_long_counter(event, value); if (idx == ARMV8_IDX_CYCLE_COUNTER) - write_sysreg(value, pmccntr_el0); + write_pmccntr(value); else armv8pmu_write_hw_counter(event, value); } @@ -642,7 +576,7 @@ static inline void armv8pmu_write_event_type(struct perf_event *event) armv8pmu_write_evtype(idx, chain_evt); } else { if (idx == ARMV8_IDX_CYCLE_COUNTER) - write_sysreg(hwc->config_base, pmccfiltr_el0); + write_pmccfiltr(hwc->config_base); else armv8pmu_write_evtype(idx, hwc->config_base); } @@ -665,7 +599,7 @@ static inline void armv8pmu_enable_counter(u32 mask) * enable the counter. * */ isb(); - write_sysreg(mask, pmcntenset_el0); + write_pmcntenset(mask); } static inline void armv8pmu_enable_event_counter(struct perf_event *event) @@ -682,7 +616,7 @@ static inline void armv8pmu_enable_event_counter(struct perf_event *event) static inline void armv8pmu_disable_counter(u32 mask) { - write_sysreg(mask, pmcntenclr_el0); + write_pmcntenclr(mask); /* * Make sure the effects of disabling the counter are visible before we * start configuring the event. @@ -704,7 +638,7 @@ static inline void armv8pmu_disable_event_counter(struct perf_event *event) static inline void armv8pmu_enable_intens(u32 mask) { - write_sysreg(mask, pmintenset_el1); + write_pmintenset(mask); } static inline void armv8pmu_enable_event_irq(struct perf_event *event) @@ -715,10 +649,10 @@ static inline void armv8pmu_enable_event_irq(struct perf_event *event) static inline void armv8pmu_disable_intens(u32 mask) { - write_sysreg(mask, pmintenclr_el1); + write_pmintenclr(mask); isb(); /* Clear the overflow flag in case an interrupt is pending. */ - write_sysreg(mask, pmovsclr_el0); + write_pmovsclr(mask); isb(); } @@ -733,18 +667,18 @@ static inline u32 armv8pmu_getreset_flags(void) u32 value; /* Read */ - value = read_sysreg(pmovsclr_el0); + value = read_pmovsclr(); /* Write to clear flags */ value &= ARMV8_PMU_OVSR_MASK; - write_sysreg(value, pmovsclr_el0); + write_pmovsclr(value); return value; } static void armv8pmu_disable_user_access(void) { - write_sysreg(0, pmuserenr_el0); + write_pmuserenr(0); } static void armv8pmu_enable_user_access(struct arm_pmu *cpu_pmu) @@ -755,13 +689,13 @@ static void armv8pmu_enable_user_access(struct arm_pmu *cpu_pmu) /* Clear any unused counters to avoid leaking their contents */ for_each_clear_bit(i, cpuc->used_mask, cpu_pmu->num_events) { if (i == ARMV8_IDX_CYCLE_COUNTER) - write_sysreg(0, pmccntr_el0); + write_pmccntr(0); else armv8pmu_write_evcntr(i, 0); } - write_sysreg(0, pmuserenr_el0); - write_sysreg(ARMV8_PMU_USERENR_ER | ARMV8_PMU_USERENR_CR, pmuserenr_el0); + write_pmuserenr(0); + write_pmuserenr(ARMV8_PMU_USERENR_ER | ARMV8_PMU_USERENR_CR); } static void armv8pmu_enable_event(struct perf_event *event) @@ -1145,14 +1079,11 @@ static void __armv8pmu_probe_pmu(void *info) { struct armv8pmu_probe_info *probe = info; struct arm_pmu *cpu_pmu = probe->pmu; - u64 dfr0; u64 pmceid_raw[2]; u32 pmceid[2]; int pmuver; - dfr0 = read_sysreg(id_aa64dfr0_el1); - pmuver = cpuid_feature_extract_unsigned_field(dfr0, - ID_AA64DFR0_EL1_PMUVer_SHIFT); + pmuver = read_pmuver(); if (pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF || pmuver == ID_AA64DFR0_EL1_PMUVer_NI) return; @@ -1167,8 +1098,8 @@ static void __armv8pmu_probe_pmu(void *info) /* Add the CPU cycles counter */ cpu_pmu->num_events += 1; - pmceid[0] = pmceid_raw[0] = read_sysreg(pmceid0_el0); - pmceid[1] = pmceid_raw[1] = read_sysreg(pmceid1_el0); + pmceid[0] = pmceid_raw[0] = read_pmceid0(); + pmceid[1] = pmceid_raw[1] = read_pmceid1(); bitmap_from_arr32(cpu_pmu->pmceid_bitmap, pmceid, ARMV8_PMUV3_MAX_COMMON_EVENTS); @@ -1179,9 +1110,9 @@ static void __armv8pmu_probe_pmu(void *info) bitmap_from_arr32(cpu_pmu->pmceid_ext_bitmap, pmceid, ARMV8_PMUV3_MAX_COMMON_EVENTS); - /* store PMMIR_EL1 register for sysfs */ + /* store PMMIR register for sysfs */ if (pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P4 && (pmceid_raw[1] & BIT(31))) - cpu_pmu->reg_pmmir = read_cpuid(PMMIR_EL1); + cpu_pmu->reg_pmmir = read_pmmir(); else cpu_pmu->reg_pmmir = 0; } diff --git a/include/linux/perf/arm_pmuv3.h b/include/linux/perf/arm_pmuv3.h index 53173abfc022..e3899bd77f5c 100644 --- a/include/linux/perf/arm_pmuv3.h +++ b/include/linux/perf/arm_pmuv3.h @@ -255,4 +255,49 @@ #define ARMV8_PMU_BUS_WIDTH_SHIFT 16 #define ARMV8_PMU_BUS_WIDTH_MASK 0xf +/* + * This code is really good + */ + +#define PMEVN_CASE(n, case_macro) \ + case n: case_macro(n); break + +#define PMEVN_SWITCH(x, case_macro) \ + do { \ + switch (x) { \ + PMEVN_CASE(0, case_macro); \ + PMEVN_CASE(1, case_macro); \ + PMEVN_CASE(2, case_macro); \ + PMEVN_CASE(3, case_macro); \ + PMEVN_CASE(4, case_macro); \ + PMEVN_CASE(5, case_macro); \ + PMEVN_CASE(6, case_macro); \ + PMEVN_CASE(7, case_macro); \ + PMEVN_CASE(8, case_macro); \ + PMEVN_CASE(9, case_macro); \ + PMEVN_CASE(10, case_macro); \ + PMEVN_CASE(11, case_macro); \ + PMEVN_CASE(12, case_macro); \ + PMEVN_CASE(13, case_macro); \ + PMEVN_CASE(14, case_macro); \ + PMEVN_CASE(15, case_macro); \ + PMEVN_CASE(16, case_macro); \ + PMEVN_CASE(17, case_macro); \ + PMEVN_CASE(18, case_macro); \ + PMEVN_CASE(19, case_macro); \ + PMEVN_CASE(20, case_macro); \ + PMEVN_CASE(21, case_macro); \ + PMEVN_CASE(22, case_macro); \ + PMEVN_CASE(23, case_macro); \ + PMEVN_CASE(24, case_macro); \ + PMEVN_CASE(25, case_macro); \ + PMEVN_CASE(26, case_macro); \ + PMEVN_CASE(27, case_macro); \ + PMEVN_CASE(28, case_macro); \ + PMEVN_CASE(29, case_macro); \ + PMEVN_CASE(30, case_macro); \ + default: WARN(1, "Invalid PMEV* index\n"); \ + } \ + } while (0) + #endif From 711432770f78e5a9ae235a1a55decfb71993c958 Mon Sep 17 00:00:00 2001 From: Zaid Al-Bassam Date: Fri, 17 Mar 2023 15:50:22 -0400 Subject: [PATCH 03/24] perf: pmuv3: Abstract PMU version checks The current PMU version definitions are available for arm64 only, As we want to add PMUv3 support to arm (32-bit), abstracts these definitions by using arch-specific helpers. Signed-off-by: Zaid Al-Bassam Tested-by: Florian Fainelli Link: https://lore.kernel.org/r/20230317195027.3746949-4-zalbassam@google.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/arm_pmuv3.h | 16 ++++++++++++++++ drivers/perf/arm_pmuv3.c | 7 +++---- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/arm_pmuv3.h b/arch/arm64/include/asm/arm_pmuv3.h index c444cbfb3acd..80cdfa4c3e88 100644 --- a/arch/arm64/include/asm/arm_pmuv3.h +++ b/arch/arm64/include/asm/arm_pmuv3.h @@ -134,4 +134,20 @@ static inline u32 read_pmceid1(void) return read_sysreg(pmceid1_el0); } +static inline bool pmuv3_implemented(int pmuver) +{ + return !(pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF || + pmuver == ID_AA64DFR0_EL1_PMUVer_NI); +} + +static inline bool is_pmuv3p4(int pmuver) +{ + return pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P4; +} + +static inline bool is_pmuv3p5(int pmuver) +{ + return pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P5; +} + #endif diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index f783f068d612..f7d890af8cc1 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -392,7 +392,7 @@ static const struct attribute_group armv8_pmuv3_caps_attr_group = { */ static bool armv8pmu_has_long_event(struct arm_pmu *cpu_pmu) { - return (cpu_pmu->pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P5); + return (is_pmuv3p5(cpu_pmu->pmuver)); } static inline bool armv8pmu_event_has_user_read(struct perf_event *event) @@ -1084,8 +1084,7 @@ static void __armv8pmu_probe_pmu(void *info) int pmuver; pmuver = read_pmuver(); - if (pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF || - pmuver == ID_AA64DFR0_EL1_PMUVer_NI) + if (!pmuv3_implemented(pmuver)) return; cpu_pmu->pmuver = pmuver; @@ -1111,7 +1110,7 @@ static void __armv8pmu_probe_pmu(void *info) pmceid, ARMV8_PMUV3_MAX_COMMON_EVENTS); /* store PMMIR register for sysfs */ - if (pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P4 && (pmceid_raw[1] & BIT(31))) + if (is_pmuv3p4(pmuver) && (pmceid_raw[1] & BIT(31))) cpu_pmu->reg_pmmir = read_pmmir(); else cpu_pmu->reg_pmmir = 0; From 11fba29a8a1f2fca08f301426b15c62d8a0b8040 Mon Sep 17 00:00:00 2001 From: Zaid Al-Bassam Date: Fri, 17 Mar 2023 15:50:23 -0400 Subject: [PATCH 04/24] perf: pmuv3: Move inclusion of kvm_host.h to the arch-specific helper KVM host support is available only on arm64. By moving the inclusion of kvm_host.h to an arm64-specific file, the 32bit architecture will be able to implement dummy helpers. Signed-off-by: Zaid Al-Bassam Tested-by: Florian Fainelli Link: https://lore.kernel.org/r/20230317195027.3746949-5-zalbassam@google.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/arm_pmuv3.h | 2 ++ drivers/perf/arm_pmuv3.c | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/arm_pmuv3.h b/arch/arm64/include/asm/arm_pmuv3.h index 80cdfa4c3e88..d6b51deb7bf0 100644 --- a/arch/arm64/include/asm/arm_pmuv3.h +++ b/arch/arm64/include/asm/arm_pmuv3.h @@ -6,6 +6,8 @@ #ifndef __ASM_PMUV3_H #define __ASM_PMUV3_H +#include + #include #include diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index f7d890af8cc1..2cab600af4fd 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -16,7 +16,6 @@ #include #include -#include #include #include #include From b3a070869f39be4fad9ea4d99f2c8fab1fb7eead Mon Sep 17 00:00:00 2001 From: Zaid Al-Bassam Date: Fri, 17 Mar 2023 15:50:24 -0400 Subject: [PATCH 05/24] perf: pmuv3: Change GENMASK to GENMASK_ULL GENMASK macro uses "unsigned long" (32-bit wide on arm and 64-bit on arm64), This causes build issues when enabling PMUv3 on arm as it tries to access bits > 31. This patch switches the GENMASK to GENMASK_ULL, which uses "unsigned long long" (64-bit on both arm and arm64). Signed-off-by: Zaid Al-Bassam Acked-by: Marc Zyngier Tested-by: Florian Fainelli Link: https://lore.kernel.org/r/20230317195027.3746949-6-zalbassam@google.com Signed-off-by: Will Deacon --- drivers/perf/arm_pmuv3.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index 2cab600af4fd..fc8ed3cd0330 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -489,7 +489,7 @@ static bool armv8pmu_event_needs_bias(struct perf_event *event) static u64 armv8pmu_bias_long_counter(struct perf_event *event, u64 value) { if (armv8pmu_event_needs_bias(event)) - value |= GENMASK(63, 32); + value |= GENMASK_ULL(63, 32); return value; } @@ -497,7 +497,7 @@ static u64 armv8pmu_bias_long_counter(struct perf_event *event, u64 value) static u64 armv8pmu_unbias_long_counter(struct perf_event *event, u64 value) { if (armv8pmu_event_needs_bias(event)) - value &= ~GENMASK(63, 32); + value &= ~GENMASK_ULL(63, 32); return value; } From 252309adc81f529d42c8c90a4965866ec82cb6ad Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 17 Mar 2023 15:50:25 -0400 Subject: [PATCH 06/24] ARM: Make CONFIG_CPU_V7 valid for 32bit ARMv8 implementations ARMv8 is a superset of ARMv7, and all the ARMv8 features are discoverable with a set of ID registers. It means that we can use CPU_V7 to guard ARMv8 features at compile time. This commit simply amends the CPU_V7 configuration symbol comment to reflect that CPU_V7 also covers ARMv8. Signed-off-by: Marc Zyngier Signed-off-by: Zaid Al-Bassam Tested-by: Florian Fainelli Link: https://lore.kernel.org/r/20230317195027.3746949-7-zalbassam@google.com Signed-off-by: Will Deacon --- arch/arm/mm/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index c5bbae86f725..be183ed1232d 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -403,7 +403,7 @@ config CPU_V6K select CPU_THUMB_CAPABLE select CPU_TLB_V6 if MMU -# ARMv7 +# ARMv7 and ARMv8 architectures config CPU_V7 bool select CPU_32v6K From 009d6dc87a568db62290b8dc0a517b612217b6da Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 17 Mar 2023 15:50:26 -0400 Subject: [PATCH 07/24] ARM: perf: Allow the use of the PMUv3 driver on 32bit ARM The only thing stopping the PMUv3 driver from compiling on 32bit is the lack of defined system registers names and the handful of required helpers. This is easily solved by providing the sysreg accessors and updating the Kconfig entry. Signed-off-by: Marc Zyngier Co-developed-by: Zaid Al-Bassam Signed-off-by: Zaid Al-Bassam Tested-by: Florian Fainelli Link: https://lore.kernel.org/r/20230317195027.3746949-8-zalbassam@google.com Signed-off-by: Will Deacon --- arch/arm/include/asm/arm_pmuv3.h | 247 +++++++++++++++++++++++++++++++ drivers/perf/Kconfig | 5 +- drivers/perf/arm_pmuv3.c | 5 +- 3 files changed, 253 insertions(+), 4 deletions(-) create mode 100644 arch/arm/include/asm/arm_pmuv3.h diff --git a/arch/arm/include/asm/arm_pmuv3.h b/arch/arm/include/asm/arm_pmuv3.h new file mode 100644 index 000000000000..78d3d4b82c6c --- /dev/null +++ b/arch/arm/include/asm/arm_pmuv3.h @@ -0,0 +1,247 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2012 ARM Ltd. + */ + +#ifndef __ASM_PMUV3_H +#define __ASM_PMUV3_H + +#include +#include + +#define PMCCNTR __ACCESS_CP15_64(0, c9) + +#define PMCR __ACCESS_CP15(c9, 0, c12, 0) +#define PMCNTENSET __ACCESS_CP15(c9, 0, c12, 1) +#define PMCNTENCLR __ACCESS_CP15(c9, 0, c12, 2) +#define PMOVSR __ACCESS_CP15(c9, 0, c12, 3) +#define PMSELR __ACCESS_CP15(c9, 0, c12, 5) +#define PMCEID0 __ACCESS_CP15(c9, 0, c12, 6) +#define PMCEID1 __ACCESS_CP15(c9, 0, c12, 7) +#define PMXEVTYPER __ACCESS_CP15(c9, 0, c13, 1) +#define PMXEVCNTR __ACCESS_CP15(c9, 0, c13, 2) +#define PMUSERENR __ACCESS_CP15(c9, 0, c14, 0) +#define PMINTENSET __ACCESS_CP15(c9, 0, c14, 1) +#define PMINTENCLR __ACCESS_CP15(c9, 0, c14, 2) +#define PMMIR __ACCESS_CP15(c9, 0, c14, 6) +#define PMCCFILTR __ACCESS_CP15(c14, 0, c15, 7) + +#define PMEVCNTR0 __ACCESS_CP15(c14, 0, c8, 0) +#define PMEVCNTR1 __ACCESS_CP15(c14, 0, c8, 1) +#define PMEVCNTR2 __ACCESS_CP15(c14, 0, c8, 2) +#define PMEVCNTR3 __ACCESS_CP15(c14, 0, c8, 3) +#define PMEVCNTR4 __ACCESS_CP15(c14, 0, c8, 4) +#define PMEVCNTR5 __ACCESS_CP15(c14, 0, c8, 5) +#define PMEVCNTR6 __ACCESS_CP15(c14, 0, c8, 6) +#define PMEVCNTR7 __ACCESS_CP15(c14, 0, c8, 7) +#define PMEVCNTR8 __ACCESS_CP15(c14, 0, c9, 0) +#define PMEVCNTR9 __ACCESS_CP15(c14, 0, c9, 1) +#define PMEVCNTR10 __ACCESS_CP15(c14, 0, c9, 2) +#define PMEVCNTR11 __ACCESS_CP15(c14, 0, c9, 3) +#define PMEVCNTR12 __ACCESS_CP15(c14, 0, c9, 4) +#define PMEVCNTR13 __ACCESS_CP15(c14, 0, c9, 5) +#define PMEVCNTR14 __ACCESS_CP15(c14, 0, c9, 6) +#define PMEVCNTR15 __ACCESS_CP15(c14, 0, c9, 7) +#define PMEVCNTR16 __ACCESS_CP15(c14, 0, c10, 0) +#define PMEVCNTR17 __ACCESS_CP15(c14, 0, c10, 1) +#define PMEVCNTR18 __ACCESS_CP15(c14, 0, c10, 2) +#define PMEVCNTR19 __ACCESS_CP15(c14, 0, c10, 3) +#define PMEVCNTR20 __ACCESS_CP15(c14, 0, c10, 4) +#define PMEVCNTR21 __ACCESS_CP15(c14, 0, c10, 5) +#define PMEVCNTR22 __ACCESS_CP15(c14, 0, c10, 6) +#define PMEVCNTR23 __ACCESS_CP15(c14, 0, c10, 7) +#define PMEVCNTR24 __ACCESS_CP15(c14, 0, c11, 0) +#define PMEVCNTR25 __ACCESS_CP15(c14, 0, c11, 1) +#define PMEVCNTR26 __ACCESS_CP15(c14, 0, c11, 2) +#define PMEVCNTR27 __ACCESS_CP15(c14, 0, c11, 3) +#define PMEVCNTR28 __ACCESS_CP15(c14, 0, c11, 4) +#define PMEVCNTR29 __ACCESS_CP15(c14, 0, c11, 5) +#define PMEVCNTR30 __ACCESS_CP15(c14, 0, c11, 6) + +#define PMEVTYPER0 __ACCESS_CP15(c14, 0, c12, 0) +#define PMEVTYPER1 __ACCESS_CP15(c14, 0, c12, 1) +#define PMEVTYPER2 __ACCESS_CP15(c14, 0, c12, 2) +#define PMEVTYPER3 __ACCESS_CP15(c14, 0, c12, 3) +#define PMEVTYPER4 __ACCESS_CP15(c14, 0, c12, 4) +#define PMEVTYPER5 __ACCESS_CP15(c14, 0, c12, 5) +#define PMEVTYPER6 __ACCESS_CP15(c14, 0, c12, 6) +#define PMEVTYPER7 __ACCESS_CP15(c14, 0, c12, 7) +#define PMEVTYPER8 __ACCESS_CP15(c14, 0, c13, 0) +#define PMEVTYPER9 __ACCESS_CP15(c14, 0, c13, 1) +#define PMEVTYPER10 __ACCESS_CP15(c14, 0, c13, 2) +#define PMEVTYPER11 __ACCESS_CP15(c14, 0, c13, 3) +#define PMEVTYPER12 __ACCESS_CP15(c14, 0, c13, 4) +#define PMEVTYPER13 __ACCESS_CP15(c14, 0, c13, 5) +#define PMEVTYPER14 __ACCESS_CP15(c14, 0, c13, 6) +#define PMEVTYPER15 __ACCESS_CP15(c14, 0, c13, 7) +#define PMEVTYPER16 __ACCESS_CP15(c14, 0, c14, 0) +#define PMEVTYPER17 __ACCESS_CP15(c14, 0, c14, 1) +#define PMEVTYPER18 __ACCESS_CP15(c14, 0, c14, 2) +#define PMEVTYPER19 __ACCESS_CP15(c14, 0, c14, 3) +#define PMEVTYPER20 __ACCESS_CP15(c14, 0, c14, 4) +#define PMEVTYPER21 __ACCESS_CP15(c14, 0, c14, 5) +#define PMEVTYPER22 __ACCESS_CP15(c14, 0, c14, 6) +#define PMEVTYPER23 __ACCESS_CP15(c14, 0, c14, 7) +#define PMEVTYPER24 __ACCESS_CP15(c14, 0, c15, 0) +#define PMEVTYPER25 __ACCESS_CP15(c14, 0, c15, 1) +#define PMEVTYPER26 __ACCESS_CP15(c14, 0, c15, 2) +#define PMEVTYPER27 __ACCESS_CP15(c14, 0, c15, 3) +#define PMEVTYPER28 __ACCESS_CP15(c14, 0, c15, 4) +#define PMEVTYPER29 __ACCESS_CP15(c14, 0, c15, 5) +#define PMEVTYPER30 __ACCESS_CP15(c14, 0, c15, 6) + +#define RETURN_READ_PMEVCNTRN(n) \ + return read_sysreg(PMEVCNTR##n) +static unsigned long read_pmevcntrn(int n) +{ + PMEVN_SWITCH(n, RETURN_READ_PMEVCNTRN); + return 0; +} + +#define WRITE_PMEVCNTRN(n) \ + write_sysreg(val, PMEVCNTR##n) +static void write_pmevcntrn(int n, unsigned long val) +{ + PMEVN_SWITCH(n, WRITE_PMEVCNTRN); +} + +#define WRITE_PMEVTYPERN(n) \ + write_sysreg(val, PMEVTYPER##n) +static void write_pmevtypern(int n, unsigned long val) +{ + PMEVN_SWITCH(n, WRITE_PMEVTYPERN); +} + +static inline unsigned long read_pmmir(void) +{ + return read_sysreg(PMMIR); +} + +static inline u32 read_pmuver(void) +{ + /* PMUVers is not a signed field */ + u32 dfr0 = read_cpuid_ext(CPUID_EXT_DFR0); + + return (dfr0 >> 24) & 0xf; +} + +static inline void write_pmcr(u32 val) +{ + write_sysreg(val, PMCR); +} + +static inline u32 read_pmcr(void) +{ + return read_sysreg(PMCR); +} + +static inline void write_pmselr(u32 val) +{ + write_sysreg(val, PMSELR); +} + +static inline void write_pmccntr(u64 val) +{ + write_sysreg(val, PMCCNTR); +} + +static inline u64 read_pmccntr(void) +{ + return read_sysreg(PMCCNTR); +} + +static inline void write_pmxevcntr(u32 val) +{ + write_sysreg(val, PMXEVCNTR); +} + +static inline u32 read_pmxevcntr(void) +{ + return read_sysreg(PMXEVCNTR); +} + +static inline void write_pmxevtyper(u32 val) +{ + write_sysreg(val, PMXEVTYPER); +} + +static inline void write_pmcntenset(u32 val) +{ + write_sysreg(val, PMCNTENSET); +} + +static inline void write_pmcntenclr(u32 val) +{ + write_sysreg(val, PMCNTENCLR); +} + +static inline void write_pmintenset(u32 val) +{ + write_sysreg(val, PMINTENSET); +} + +static inline void write_pmintenclr(u32 val) +{ + write_sysreg(val, PMINTENCLR); +} + +static inline void write_pmccfiltr(u32 val) +{ + write_sysreg(val, PMCCFILTR); +} + +static inline void write_pmovsclr(u32 val) +{ + write_sysreg(val, PMOVSR); +} + +static inline u32 read_pmovsclr(void) +{ + return read_sysreg(PMOVSR); +} + +static inline void write_pmuserenr(u32 val) +{ + write_sysreg(val, PMUSERENR); +} + +static inline u32 read_pmceid0(void) +{ + return read_sysreg(PMCEID0); +} + +static inline u32 read_pmceid1(void) +{ + return read_sysreg(PMCEID1); +} + +static inline void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) {} +static inline void kvm_clr_pmu_events(u32 clr) {} +static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr) +{ + return false; +} + +/* PMU Version in DFR Register */ +#define ARMV8_PMU_DFR_VER_NI 0 +#define ARMV8_PMU_DFR_VER_V3P4 0x5 +#define ARMV8_PMU_DFR_VER_V3P5 0x6 +#define ARMV8_PMU_DFR_VER_IMP_DEF 0xF + +static inline bool pmuv3_implemented(int pmuver) +{ + return !(pmuver == ARMV8_PMU_DFR_VER_IMP_DEF || + pmuver == ARMV8_PMU_DFR_VER_NI); +} + +static inline bool is_pmuv3p4(int pmuver) +{ + return pmuver >= ARMV8_PMU_DFR_VER_V3P4; +} + +static inline bool is_pmuv3p5(int pmuver) +{ + return pmuver >= ARMV8_PMU_DFR_VER_V3P5; +} + +#endif diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index defe6b47854b..711f82400086 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -101,15 +101,14 @@ config ARM_SMMU_V3_PMU based on the Stream ID of the corresponding master. config ARM_PMUV3 - depends on HW_PERF_EVENTS && ARM64 + depends on HW_PERF_EVENTS && ((ARM && CPU_V7) || ARM64) bool "ARM PMUv3 support" if !ARM64 - default y + default ARM64 help Say y if you want to use the ARM performance monitor unit (PMU) version 3. The PMUv3 is the CPU performance monitors on ARMv8 (aarch32 and aarch64) systems that implement the PMUv3 architecture. - Currently, PMUv3 is only supported on aarch64 (arm64) config ARM_DSU_PMU tristate "ARM DynamIQ Shared Unit (DSU) PMU" diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index fc8ed3cd0330..34ed0d5d7898 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -388,10 +388,13 @@ static const struct attribute_group armv8_pmuv3_caps_attr_group = { * We unconditionally enable ARMv8.5-PMU long event counter support * (64-bit events) where supported. Indicate if this arm_pmu has long * event counter support. + * + * On AArch32, long counters make no sense (you can't access the top + * bits), so we only enable this on AArch64. */ static bool armv8pmu_has_long_event(struct arm_pmu *cpu_pmu) { - return (is_pmuv3p5(cpu_pmu->pmuver)); + return (IS_ENABLED(CONFIG_ARM64) && is_pmuv3p5(cpu_pmu->pmuver)); } static inline bool armv8pmu_event_has_user_read(struct perf_event *event) From 3b16f6268e660f15aed0bb97aefe87e893eb8882 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 17 Mar 2023 15:50:27 -0400 Subject: [PATCH 08/24] ARM: mach-virt: Select PMUv3 driver by default Since 32bit guests are not unlikely to run on an ARMv8 host, let's select the PMUv3 driver, which allows the PMU to be used on such systems. Signed-off-by: Marc Zyngier Signed-off-by: Zaid Al-Bassam Tested-by: Florian Fainelli Link: https://lore.kernel.org/r/20230317195027.3746949-9-zalbassam@google.com Signed-off-by: Will Deacon --- arch/arm/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index e24a9820e12f..a5e5c0b09ff2 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -401,6 +401,7 @@ config ARCH_VIRT select ARM_GIC_V3 select ARM_GIC_V3_ITS if PCI select ARM_PSCI + select ARM_PMUV3 if PERF_EVENTS select HAVE_ARM_ARCH_TIMER config ARCH_AIROHA From f87e9114b5e590c2c6658ca21d7b714ca240bdd0 Mon Sep 17 00:00:00 2001 From: Ilkka Koskinen Date: Wed, 1 Mar 2023 09:55:40 -0800 Subject: [PATCH 09/24] perf/arm-cmn: Move overlapping wp_combine field As eventid field was expanded to support new mesh versions, it started to overlap with wp_combine field. Move wp_combine to fix the issue. Fixes: 23760a014417 ("perf/arm-cmn: Add CMN-700 support") Signed-off-by: Ilkka Koskinen Link: https://lore.kernel.org/r/20230301175540.19891-1-ilkka@os.amperecomputing.com Signed-off-by: Will Deacon --- drivers/perf/arm-cmn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index c9689861be3f..9f2edc28d16e 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -166,7 +166,7 @@ #define CMN_EVENT_BYNODEID(event) FIELD_GET(CMN_CONFIG_BYNODEID, (event)->attr.config) #define CMN_EVENT_NODEID(event) FIELD_GET(CMN_CONFIG_NODEID, (event)->attr.config) -#define CMN_CONFIG_WP_COMBINE GENMASK_ULL(27, 24) +#define CMN_CONFIG_WP_COMBINE GENMASK_ULL(30, 27) #define CMN_CONFIG_WP_DEV_SEL GENMASK_ULL(50, 48) #define CMN_CONFIG_WP_CHN_SEL GENMASK_ULL(55, 51) /* Note that we don't yet support the tertiary match group on newer IPs */ From d7f4679dc8d1a5cec497dd9f1b315ce77ab8db28 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Wed, 15 Mar 2023 10:30:17 +0800 Subject: [PATCH 10/24] perf: arm: Use devm_platform_get_and_ioremap_resource() According to commit 890cc39a8799 ("drivers: provide devm_platform_get_and_ioremap_resource()"), convert platform_get_resource(), devm_ioremap_resource() to a single call to devm_platform_get_and_ioremap_resource(), as this is exactly what this function does. Signed-off-by: Yang Li Link: https://lore.kernel.org/r/20230315023017.35789-1-yang.lee@linux.alibaba.com Signed-off-by: Will Deacon --- drivers/perf/arm_dmc620_pmu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/perf/arm_dmc620_pmu.c b/drivers/perf/arm_dmc620_pmu.c index 54aa4658fb36..5de06f9a4dd3 100644 --- a/drivers/perf/arm_dmc620_pmu.c +++ b/drivers/perf/arm_dmc620_pmu.c @@ -655,8 +655,7 @@ static int dmc620_pmu_device_probe(struct platform_device *pdev) .attr_groups = dmc620_pmu_attr_groups, }; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - dmc620_pmu->base = devm_ioremap_resource(&pdev->dev, res); + dmc620_pmu->base = devm_platform_get_and_ioremap_resource(pdev, 0, &res); if (IS_ERR(dmc620_pmu->base)) return PTR_ERR(dmc620_pmu->base); From 8540504c5136146fd5c867afd86bb4618c1ee61a Mon Sep 17 00:00:00 2001 From: Yang Li Date: Wed, 15 Mar 2023 10:31:08 +0800 Subject: [PATCH 11/24] perf: qcom: Use devm_platform_get_and_ioremap_resource() According to commit 890cc39a8799 ("drivers: provide devm_platform_get_and_ioremap_resource()"), convert platform_get_resource(), devm_ioremap_resource() to a single call to devm_platform_get_and_ioremap_resource(), as this is exactly what this function does. Signed-off-by: Yang Li Link: https://lore.kernel.org/r/20230315023108.36953-1-yang.lee@linux.alibaba.com Signed-off-by: Will Deacon --- drivers/perf/qcom_l3_pmu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/perf/qcom_l3_pmu.c b/drivers/perf/qcom_l3_pmu.c index 346311a05460..2887edb4eb0b 100644 --- a/drivers/perf/qcom_l3_pmu.c +++ b/drivers/perf/qcom_l3_pmu.c @@ -763,8 +763,7 @@ static int qcom_l3_cache_pmu_probe(struct platform_device *pdev) .capabilities = PERF_PMU_CAP_NO_EXCLUDE, }; - memrc = platform_get_resource(pdev, IORESOURCE_MEM, 0); - l3pmu->regs = devm_ioremap_resource(&pdev->dev, memrc); + l3pmu->regs = devm_platform_get_and_ioremap_resource(pdev, 0, &memrc); if (IS_ERR(l3pmu->regs)) return PTR_ERR(l3pmu->regs); From a64021d3726a096442d63a958ff0c49c12c3a1f4 Mon Sep 17 00:00:00 2001 From: Nick Alcock Date: Fri, 17 Feb 2023 14:10:43 +0000 Subject: [PATCH 12/24] kbuild, drivers/perf: remove MODULE_LICENSE in non-modules Since commit 8b41fc4454e ("kbuild: create modules.builtin without Makefile.modbuiltin or tristate.conf"), MODULE_LICENSE declarations are used to identify modules. As a consequence, uses of the macro in non-modules will cause modprobe to misidentify their containing object file as a module when it is not (false positives), and modprobe might succeed rather than failing with a suitable error message. So remove it in the files in this commit, none of which can be built as modules. Signed-off-by: Nick Alcock Suggested-by: Luis Chamberlain Cc: Luis Chamberlain Cc: linux-modules@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: Hitomi Hasegawa Cc: Will Deacon Cc: Mark Rutland Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20230217141059.392471-9-nick.alcock@oracle.com Signed-off-by: Will Deacon --- drivers/perf/apple_m1_cpu_pmu.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/perf/apple_m1_cpu_pmu.c b/drivers/perf/apple_m1_cpu_pmu.c index 979a7c2b4f56..7123beeb992f 100644 --- a/drivers/perf/apple_m1_cpu_pmu.c +++ b/drivers/perf/apple_m1_cpu_pmu.c @@ -581,4 +581,3 @@ static struct platform_driver m1_pmu_driver = { }; module_platform_driver(m1_pmu_driver); -MODULE_LICENSE("GPL v2"); From bc12f344d5de246d0e2bd1ffcd5f950314e460d9 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Thu, 16 Feb 2023 14:34:03 +0800 Subject: [PATCH 13/24] drivers/perf: Use devm_platform_get_and_ioremap_resource() Convert platform_get_resource(), devm_ioremap_resource() to a single call to devm_platform_get_and_ioremap_resource(), as this is exactly what this function does. Signed-off-by: Yang Li Reviewed-by: Shuai Xue Link: https://lore.kernel.org/r/20230216063403.9753-1-yang.lee@linux.alibaba.com Signed-off-by: Will Deacon --- drivers/perf/alibaba_uncore_drw_pmu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/perf/alibaba_uncore_drw_pmu.c b/drivers/perf/alibaba_uncore_drw_pmu.c index a7689fecb49d..5c5be9fc1b15 100644 --- a/drivers/perf/alibaba_uncore_drw_pmu.c +++ b/drivers/perf/alibaba_uncore_drw_pmu.c @@ -656,8 +656,7 @@ static int ali_drw_pmu_probe(struct platform_device *pdev) drw_pmu->dev = &pdev->dev; platform_set_drvdata(pdev, drw_pmu); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - drw_pmu->cfg_base = devm_ioremap_resource(&pdev->dev, res); + drw_pmu->cfg_base = devm_platform_get_and_ioremap_resource(pdev, 0, &res); if (IS_ERR(drw_pmu->cfg_base)) return PTR_ERR(drw_pmu->cfg_base); From c61e5720f23273269cc67ffb2908cf9831c8ca9d Mon Sep 17 00:00:00 2001 From: Jiucheng Xu Date: Thu, 9 Feb 2023 19:54:01 +0800 Subject: [PATCH 14/24] perf/amlogic: Fix config1/config2 parsing issue The 3th argument of for_each_set_bit is incorrect, fix them. Fixes: 2016e2113d35 ("perf/amlogic: Add support for Amlogic meson G12 SoC DDR PMU driver") Signed-off-by: Jiucheng Xu Link: https://lore.kernel.org/r/20230209115403.521868-1-jiucheng.xu@amlogic.com Signed-off-by: Will Deacon --- drivers/perf/amlogic/meson_ddr_pmu_core.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/perf/amlogic/meson_ddr_pmu_core.c b/drivers/perf/amlogic/meson_ddr_pmu_core.c index b84346dbac2c..0b24dee1ed3c 100644 --- a/drivers/perf/amlogic/meson_ddr_pmu_core.c +++ b/drivers/perf/amlogic/meson_ddr_pmu_core.c @@ -156,10 +156,14 @@ static int meson_ddr_perf_event_add(struct perf_event *event, int flags) u64 config2 = event->attr.config2; int i; - for_each_set_bit(i, (const unsigned long *)&config1, sizeof(config1)) + for_each_set_bit(i, + (const unsigned long *)&config1, + BITS_PER_TYPE(config1)) meson_ddr_set_axi_filter(event, i); - for_each_set_bit(i, (const unsigned long *)&config2, sizeof(config2)) + for_each_set_bit(i, + (const unsigned long *)&config2, + BITS_PER_TYPE(config2)) meson_ddr_set_axi_filter(event, i + 64); if (flags & PERF_EF_START) From 16e15834659e9f5c05b9f12da6e86d76165c60a3 Mon Sep 17 00:00:00 2001 From: Besar Wicaksono Date: Thu, 2 Mar 2023 14:57:01 -0600 Subject: [PATCH 15/24] perf: arm_cspmu: Fix variable dereference warning Fix warning message from smatch tool: | smatch warnings: | drivers/perf/arm_cspmu/arm_cspmu.c:1075 arm_cspmu_find_cpu_container() | warn: variable dereferenced before check 'cpu_dev' (see line 1073) Reported-by: kernel test robot Reported-by: Dan Carpenter Link: https://lore.kernel.org/r/202302191227.kc0V8fM7-lkp@intel.com/ Reviewed-by: Suzuki K Poulose Signed-off-by: Besar Wicaksono Acked-by: Randy Dunlap Link: https://lore.kernel.org/r/20230302205701.35323-1-bwicaksono@nvidia.com Signed-off-by: Will Deacon --- drivers/perf/arm_cspmu/arm_cspmu.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c index e31302ab7e37..a3f1c410b417 100644 --- a/drivers/perf/arm_cspmu/arm_cspmu.c +++ b/drivers/perf/arm_cspmu/arm_cspmu.c @@ -1078,12 +1078,14 @@ static int arm_cspmu_request_irq(struct arm_cspmu *cspmu) static inline int arm_cspmu_find_cpu_container(int cpu, u32 container_uid) { u32 acpi_uid; - struct device *cpu_dev = get_cpu_device(cpu); - struct acpi_device *acpi_dev = ACPI_COMPANION(cpu_dev); + struct device *cpu_dev; + struct acpi_device *acpi_dev; + cpu_dev = get_cpu_device(cpu); if (!cpu_dev) return -ENODEV; + acpi_dev = ACPI_COMPANION(cpu_dev); while (acpi_dev) { if (!strcmp(acpi_device_hid(acpi_dev), ACPI_PROCESSOR_CONTAINER_HID) && From 640a3b7a3d138cb2467b75a6830144fac1c26a81 Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Tue, 14 Feb 2023 11:38:01 +0100 Subject: [PATCH 16/24] dt-bindings: arm-pmu: Add PMU compatible strings for Apple M2 cores The PMUs on the Apple M2 cores avalanche and blizzard CPU are compatible with M1 ones. As on M1 we don't know exactly what the counters count so use a distinct compatible for each micro-architecture. Apple's PMU counter description omits a counter for M2 so there is some variation on the interpretation of the counters. Signed-off-by: Janne Grunau Acked-by: Rob Herring Reviewed-by: Hector Martin Link: https://lore.kernel.org/r/20230214-apple_m2_pmu-v1-1-9c9213ab9b63@jannau.net Signed-off-by: Will Deacon --- Documentation/devicetree/bindings/arm/pmu.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/arm/pmu.yaml b/Documentation/devicetree/bindings/arm/pmu.yaml index dbb6f3dc5ae5..e14358bf0b9c 100644 --- a/Documentation/devicetree/bindings/arm/pmu.yaml +++ b/Documentation/devicetree/bindings/arm/pmu.yaml @@ -20,6 +20,8 @@ properties: items: - enum: - apm,potenza-pmu + - apple,avalanche-pmu + - apple,blizzard-pmu - apple,firestorm-pmu - apple,icestorm-pmu - arm,armv8-pmuv3 # Only for s/w models From 7d0bfb7c997753ef88f4c3a29f3409c8cb052ab1 Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Tue, 14 Feb 2023 11:38:02 +0100 Subject: [PATCH 17/24] drivers/perf: apple_m1: Add Apple M2 support The PMU itself is compatible with the one found on M1. We still know next to nothing about the counters so keep using CPU uarch specific compatibles/PMU names. Signed-off-by: Janne Grunau Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20230214-apple_m2_pmu-v1-2-9c9213ab9b63@jannau.net Signed-off-by: Will Deacon --- drivers/perf/apple_m1_cpu_pmu.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/perf/apple_m1_cpu_pmu.c b/drivers/perf/apple_m1_cpu_pmu.c index 7123beeb992f..8574c6e58c83 100644 --- a/drivers/perf/apple_m1_cpu_pmu.c +++ b/drivers/perf/apple_m1_cpu_pmu.c @@ -559,7 +559,21 @@ static int m1_pmu_fire_init(struct arm_pmu *cpu_pmu) return m1_pmu_init(cpu_pmu); } +static int m2_pmu_avalanche_init(struct arm_pmu *cpu_pmu) +{ + cpu_pmu->name = "apple_avalanche_pmu"; + return m1_pmu_init(cpu_pmu); +} + +static int m2_pmu_blizzard_init(struct arm_pmu *cpu_pmu) +{ + cpu_pmu->name = "apple_blizzard_pmu"; + return m1_pmu_init(cpu_pmu); +} + static const struct of_device_id m1_pmu_of_device_ids[] = { + { .compatible = "apple,avalanche-pmu", .data = m2_pmu_avalanche_init, }, + { .compatible = "apple,blizzard-pmu", .data = m2_pmu_blizzard_init, }, { .compatible = "apple,icestorm-pmu", .data = m1_pmu_ice_init, }, { .compatible = "apple,firestorm-pmu", .data = m1_pmu_fire_init, }, { }, From 4248d043e462bd43dbef60164d35b817d5664eb1 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 28 Mar 2023 18:52:14 +0100 Subject: [PATCH 18/24] Revert "ARM: mach-virt: Select PMUv3 driver by default" This reverts commit 3b16f6268e660f15aed0bb97aefe87e893eb8882. Selecting a Kconfig option that has its own set of dependencies tends to end badly, and in this case 'randconfig' builds blew up on 32-bit ARM where ARM_PMUV3 was being selecting with HW_PERF_EVENTS=n: | drivers/perf/arm_pmuv3.c:68:5: error: use of undeclared identifier 'DTLB' | [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1D_TLB, | ^ | fatal error: too many errors emitted, stopping now [-ferror-limit=] | 20 errors generated. | | Kconfig warnings: (for reference only) | WARNING: unmet direct dependencies detected for ARM_PMUV3 | Depends on [n]: PERF_EVENTS [=y] && HW_PERF_EVENTS [=n] && (ARM [=y] && CPU_V7 [=y] || ARM64) | Selected by [y]: | - ARCH_VIRT [=y] && ARCH_MULTI_V7 [=y] && PERF_EVENTS [=y] As suggested by Marc, just drop the 'select' clause altogether by reverting the patch which introduced it. Link: https://lore.kernel.org/r/202303281539.zzI4vpw1-lkp@intel.com Reported-by: kernel test robot Reported-by: Arnd Bergmann Suggested-by: Marc Zyngier Signed-off-by: Will Deacon --- arch/arm/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index a5e5c0b09ff2..e24a9820e12f 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -401,7 +401,6 @@ config ARCH_VIRT select ARM_GIC_V3 select ARM_GIC_V3_ITS if PCI select ARM_PSCI - select ARM_PMUV3 if PERF_EVENTS select HAVE_ARM_ARCH_TIMER config ARCH_AIROHA From 23b2fd83948952ffee2e96ce6a982be9d8e96f9f Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 3 Apr 2023 12:49:05 +0100 Subject: [PATCH 19/24] perf/arm-cmn: Validate cycles events fully DTC cycle count events don't have anything to validate or initialise in themselves, but we should not forget to still validate their whole group context. Otherwise, we may fail to correctly reject a contrived group containing an impossible number of cycles events. Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/3124e8c276a1f513c1a415dc839ca4181b3c8bc8.1680522545.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/perf/arm-cmn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index 9f2edc28d16e..5a57c3f10d27 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -1546,7 +1546,7 @@ static int arm_cmn_event_init(struct perf_event *event) type = CMN_EVENT_TYPE(event); /* DTC events (i.e. cycles) already have everything they need */ if (type == CMN_TYPE_DTC) - return 0; + return arm_cmn_validate_group(cmn, event); eventid = CMN_EVENT_EVENTID(event); /* For watchpoints we need the actual XP node here */ From a30b87e6bd7db112deb1e3014b23fb90111bc6c4 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Tue, 11 Apr 2023 11:38:09 +0200 Subject: [PATCH 20/24] arm64: pmuv3: dynamically map PERF_COUNT_HW_BRANCH_INSTRUCTIONS The mapping of perf_events generic hardware events to actual PMU events on ARM PMUv3 may not always be correct. This is in particular true for the PERF_COUNT_HW_BRANCH_INSTRUCTIONS event. Although the mapping points to an architected event, it may not always be available. This can be seen with a simple: $ perf stat -e branches sleep 0 Performance counter stats for 'sleep 0': branches 0.001401081 seconds time elapsed Yet the hardware does have an event that could be used for branches. Dynamically check for a supported hardware event which can be used for PERF_COUNT_HW_BRANCH_INSTRUCTIONS at mapping time. And with that: $ perf stat -e branches sleep 0 Performance counter stats for 'sleep 0': 166,739 branches 0.000832163 seconds time elapsed Co-developed-by: Stephane Eranian Signed-off-by: Stephane Eranian Co-developed-by: Mark Rutland Signed-off-by: Mark Rutland Co-developed-by: Peter Newman Signed-off-by: Peter Newman Link: https://lore.kernel.org/all/YvunKCJHSXKz%2FkZB@FVFF77S0Q05N Link: https://lore.kernel.org/r/20230411093809.657501-1-peternewman@google.com Signed-off-by: Will Deacon --- drivers/perf/arm_pmuv3.c | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index 34ed0d5d7898..c98e4039386d 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -46,7 +46,6 @@ static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = { [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INST_RETIRED, [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE, [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED, [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, [PERF_COUNT_HW_BUS_CYCLES] = ARMV8_PMUV3_PERFCTR_BUS_CYCLES, [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV8_PMUV3_PERFCTR_STALL_FRONTEND, @@ -985,6 +984,28 @@ static void armv8pmu_reset(void *info) armv8pmu_pmcr_write(pmcr); } +static int __armv8_pmuv3_map_event_id(struct arm_pmu *armpmu, + struct perf_event *event) +{ + if (event->attr.type == PERF_TYPE_HARDWARE && + event->attr.config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) { + + if (test_bit(ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED, + armpmu->pmceid_bitmap)) + return ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED; + + if (test_bit(ARMV8_PMUV3_PERFCTR_BR_RETIRED, + armpmu->pmceid_bitmap)) + return ARMV8_PMUV3_PERFCTR_BR_RETIRED; + + return HW_OP_UNSUPPORTED; + } + + return armpmu_map_event(event, &armv8_pmuv3_perf_map, + &armv8_pmuv3_perf_cache_map, + ARMV8_PMU_EVTYPE_EVENT); +} + static int __armv8_pmuv3_map_event(struct perf_event *event, const unsigned (*extra_event_map) [PERF_COUNT_HW_MAX], @@ -996,9 +1017,7 @@ static int __armv8_pmuv3_map_event(struct perf_event *event, int hw_event_id; struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - hw_event_id = armpmu_map_event(event, &armv8_pmuv3_perf_map, - &armv8_pmuv3_perf_cache_map, - ARMV8_PMU_EVTYPE_EVENT); + hw_event_id = __armv8_pmuv3_map_event_id(armpmu, event); /* * CHAIN events only work when paired with an adjacent counter, and it From 2ad91e44e6b0c7ef1ed151b3bb2242a2144e6085 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 12 Apr 2023 11:29:40 +0100 Subject: [PATCH 21/24] perf/arm-cmn: Fix port detection for CMN-700 When the "extra device ports" configuration was first added, the additional mxp_device_port_connect_info registers were added around the existing mxp_mesh_port_connect_info registers. What I missed about CMN-700 is that it shuffled them around to remove this discontinuity. As such, tweak the definitions and factor out a helper for reading these registers so we can deal with this discrepancy easily, which does at least allow nicely tidying up the callsites. With this we can then also do the nice thing and skip accesses completely rather than relying on RES0 behaviour where we know the extra registers aren't defined. Fixes: 23760a014417 ("perf/arm-cmn: Add CMN-700 support") Reported-by: Jing Zhang Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/71d129241d4d7923cde72a0e5b4c8d2f6084525f.1681295193.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/perf/arm-cmn.c | 55 ++++++++++++++++++++++-------------------- 1 file changed, 29 insertions(+), 26 deletions(-) diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index 5a57c3f10d27..47d359f72957 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -57,14 +57,12 @@ #define CMN_INFO_REQ_VC_NUM GENMASK_ULL(1, 0) /* XPs also have some local topology info which has uses too */ -#define CMN_MXP__CONNECT_INFO_P0 0x0008 -#define CMN_MXP__CONNECT_INFO_P1 0x0010 -#define CMN_MXP__CONNECT_INFO_P2 0x0028 -#define CMN_MXP__CONNECT_INFO_P3 0x0030 -#define CMN_MXP__CONNECT_INFO_P4 0x0038 -#define CMN_MXP__CONNECT_INFO_P5 0x0040 +#define CMN_MXP__CONNECT_INFO(p) (0x0008 + 8 * (p)) #define CMN__CONNECT_INFO_DEVICE_TYPE GENMASK_ULL(4, 0) +#define CMN_MAX_PORTS 6 +#define CI700_CONNECT_INFO_P2_5_OFFSET 0x10 + /* PMU registers occupy the 3rd 4KB page of each node's region */ #define CMN_PMU_OFFSET 0x2000 @@ -396,6 +394,25 @@ static struct arm_cmn_node *arm_cmn_node(const struct arm_cmn *cmn, return NULL; } +static u32 arm_cmn_device_connect_info(const struct arm_cmn *cmn, + const struct arm_cmn_node *xp, int port) +{ + int offset = CMN_MXP__CONNECT_INFO(port); + + if (port >= 2) { + if (cmn->model & (CMN600 | CMN650)) + return 0; + /* + * CI-700 may have extra ports, but still has the + * mesh_port_connect_info registers in the way. + */ + if (cmn->model == CI700) + offset += CI700_CONNECT_INFO_P2_5_OFFSET; + } + + return readl_relaxed(xp->pmu_base - CMN_PMU_OFFSET + offset); +} + static struct dentry *arm_cmn_debugfs; #ifdef CONFIG_DEBUG_FS @@ -469,7 +486,7 @@ static int arm_cmn_map_show(struct seq_file *s, void *data) y = cmn->mesh_y; while (y--) { int xp_base = cmn->mesh_x * y; - u8 port[6][CMN_MAX_DIMENSION]; + u8 port[CMN_MAX_PORTS][CMN_MAX_DIMENSION]; for (x = 0; x < cmn->mesh_x; x++) seq_puts(s, "--------+"); @@ -477,14 +494,9 @@ static int arm_cmn_map_show(struct seq_file *s, void *data) seq_printf(s, "\n%d |", y); for (x = 0; x < cmn->mesh_x; x++) { struct arm_cmn_node *xp = cmn->xps + xp_base + x; - void __iomem *base = xp->pmu_base - CMN_PMU_OFFSET; - port[0][x] = readl_relaxed(base + CMN_MXP__CONNECT_INFO_P0); - port[1][x] = readl_relaxed(base + CMN_MXP__CONNECT_INFO_P1); - port[2][x] = readl_relaxed(base + CMN_MXP__CONNECT_INFO_P2); - port[3][x] = readl_relaxed(base + CMN_MXP__CONNECT_INFO_P3); - port[4][x] = readl_relaxed(base + CMN_MXP__CONNECT_INFO_P4); - port[5][x] = readl_relaxed(base + CMN_MXP__CONNECT_INFO_P5); + for (p = 0; p < CMN_MAX_PORTS; p++) + port[p][x] = arm_cmn_device_connect_info(cmn, xp, p); seq_printf(s, " XP #%-2d |", xp_base + x); } @@ -2083,18 +2095,9 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) * from this, since in that case we will see at least one XP * with port 2 connected, for the HN-D. */ - if (readq_relaxed(xp_region + CMN_MXP__CONNECT_INFO_P0)) - xp_ports |= BIT(0); - if (readq_relaxed(xp_region + CMN_MXP__CONNECT_INFO_P1)) - xp_ports |= BIT(1); - if (readq_relaxed(xp_region + CMN_MXP__CONNECT_INFO_P2)) - xp_ports |= BIT(2); - if (readq_relaxed(xp_region + CMN_MXP__CONNECT_INFO_P3)) - xp_ports |= BIT(3); - if (readq_relaxed(xp_region + CMN_MXP__CONNECT_INFO_P4)) - xp_ports |= BIT(4); - if (readq_relaxed(xp_region + CMN_MXP__CONNECT_INFO_P5)) - xp_ports |= BIT(5); + for (int p = 0; p < CMN_MAX_PORTS; p++) + if (arm_cmn_device_connect_info(cmn, xp, p)) + xp_ports |= BIT(p); if (cmn->multi_dtm && (xp_ports & 0xc)) arm_cmn_init_dtm(dtm++, xp, 1); From 25d8c25025a46e7621edde2eb6d5f55c6d29ee86 Mon Sep 17 00:00:00 2001 From: Junhao He Date: Mon, 3 Apr 2023 16:14:22 +0800 Subject: [PATCH 22/24] drivers/perf: hisi: Remove redundant initialized of pmu->name "pmu->name" is initialized by perf_pmu_register() function, so remove the redundant initialized in hisi_pmu_init(). Signed-off-by: Junhao He Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/20230403081423.62460-2-hejunhao3@huawei.com Signed-off-by: Will Deacon --- drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c | 2 +- drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 2 +- drivers/perf/hisilicon/hisi_uncore_hha_pmu.c | 2 +- drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 2 +- drivers/perf/hisilicon/hisi_uncore_pa_pmu.c | 2 +- drivers/perf/hisilicon/hisi_uncore_pmu.c | 4 +--- drivers/perf/hisilicon/hisi_uncore_pmu.h | 3 +-- drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c | 2 +- 8 files changed, 8 insertions(+), 11 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c index 4c67d57217a7..40f1bc9f9b91 100644 --- a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c @@ -316,7 +316,7 @@ static int hisi_cpa_pmu_probe(struct platform_device *pdev) if (!name) return -ENOMEM; - hisi_pmu_init(cpa_pmu, name, THIS_MODULE); + hisi_pmu_init(cpa_pmu, THIS_MODULE); /* Power Management should be disabled before using CPA PMU. */ hisi_cpa_pmu_disable_pm(cpa_pmu); diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index 8c3ffcbfd4c0..8a3d74ddcd6d 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -516,7 +516,7 @@ static int hisi_ddrc_pmu_probe(struct platform_device *pdev) "hisi_sccl%u_ddrc%u", ddrc_pmu->sccl_id, ddrc_pmu->index_id); - hisi_pmu_init(ddrc_pmu, name, THIS_MODULE); + hisi_pmu_init(ddrc_pmu, THIS_MODULE); ret = perf_pmu_register(&ddrc_pmu->pmu, name, -1); if (ret) { diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c index 806698b9eabf..5701a84edb0e 100644 --- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c @@ -519,7 +519,7 @@ static int hisi_hha_pmu_probe(struct platform_device *pdev) name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_hha%u", hha_pmu->sccl_id, hha_pmu->index_id); - hisi_pmu_init(hha_pmu, name, THIS_MODULE); + hisi_pmu_init(hha_pmu, THIS_MODULE); ret = perf_pmu_register(&hha_pmu->pmu, name, -1); if (ret) { diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 5b2c35f1658a..68596b566344 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -557,7 +557,7 @@ static int hisi_l3c_pmu_probe(struct platform_device *pdev) */ name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_l3c%u", l3c_pmu->sccl_id, l3c_pmu->ccl_id); - hisi_pmu_init(l3c_pmu, name, THIS_MODULE); + hisi_pmu_init(l3c_pmu, THIS_MODULE); ret = perf_pmu_register(&l3c_pmu->pmu, name, -1); if (ret) { diff --git a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c index afe3419f3f6d..71b6687d6696 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c @@ -412,7 +412,7 @@ static int hisi_pa_pmu_probe(struct platform_device *pdev) return ret; } - hisi_pmu_init(pa_pmu, name, THIS_MODULE); + hisi_pmu_init(pa_pmu, THIS_MODULE); ret = perf_pmu_register(&pa_pmu->pmu, name, -1); if (ret) { dev_err(pa_pmu->dev, "PMU register failed, ret = %d\n", ret); diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index f1b0f5e1a28f..2823f381930d 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -531,12 +531,10 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) } EXPORT_SYMBOL_GPL(hisi_uncore_pmu_offline_cpu); -void hisi_pmu_init(struct hisi_pmu *hisi_pmu, const char *name, - struct module *module) +void hisi_pmu_init(struct hisi_pmu *hisi_pmu, struct module *module) { struct pmu *pmu = &hisi_pmu->pmu; - pmu->name = name; pmu->module = module; pmu->task_ctx_nr = perf_invalid_context; pmu->event_init = hisi_uncore_pmu_event_init; diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h index f8e3cc6903d7..07890a8e96ca 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.h +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h @@ -121,6 +121,5 @@ ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev, int hisi_uncore_pmu_init_irq(struct hisi_pmu *hisi_pmu, struct platform_device *pdev); -void hisi_pmu_init(struct hisi_pmu *hisi_pmu, const char *name, - struct module *module); +void hisi_pmu_init(struct hisi_pmu *hisi_pmu, struct module *module); #endif /* __HISI_UNCORE_PMU_H__ */ diff --git a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c index 1e354433776a..6fe534a665ed 100644 --- a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c @@ -445,7 +445,7 @@ static int hisi_sllc_pmu_probe(struct platform_device *pdev) return ret; } - hisi_pmu_init(sllc_pmu, name, THIS_MODULE); + hisi_pmu_init(sllc_pmu, THIS_MODULE); ret = perf_pmu_register(&sllc_pmu->pmu, name, -1); if (ret) { From 257aedb72e731082ab514058e57b132f0b29d707 Mon Sep 17 00:00:00 2001 From: Junhao He Date: Mon, 3 Apr 2023 16:14:23 +0800 Subject: [PATCH 23/24] drivers/perf: hisi: add NULL check for name When allocations fails that can be NULL now. If the name provided is NULL, then the initialization process of the PMU type and dev will be skipped in function perf_pmu_register(). Consequently, the PMU will not be able to register into the kernel. Moreover, in the case of unregister the PMU, the function device_del() will need to handle NULL pointers, which potentially can cause issues. So move this allocation above the cpuhp_state_add_instance() and directly return if it does fail. Signed-off-by: Junhao He Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/20230403081423.62460-3-hejunhao3@huawei.com Signed-off-by: Will Deacon --- drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 17 ++++++++++------- drivers/perf/hisilicon/hisi_uncore_hha_pmu.c | 7 +++++-- drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 11 +++++------ 3 files changed, 20 insertions(+), 15 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index 8a3d74ddcd6d..ffb039d05d07 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -499,13 +499,6 @@ static int hisi_ddrc_pmu_probe(struct platform_device *pdev) if (ret) return ret; - ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE, - &ddrc_pmu->node); - if (ret) { - dev_err(&pdev->dev, "Error %d registering hotplug;\n", ret); - return ret; - } - if (ddrc_pmu->identifier >= HISI_PMU_V2) name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_ddrc%u_%u", @@ -516,6 +509,16 @@ static int hisi_ddrc_pmu_probe(struct platform_device *pdev) "hisi_sccl%u_ddrc%u", ddrc_pmu->sccl_id, ddrc_pmu->index_id); + if (!name) + return -ENOMEM; + + ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE, + &ddrc_pmu->node); + if (ret) { + dev_err(&pdev->dev, "Error %d registering hotplug;\n", ret); + return ret; + } + hisi_pmu_init(ddrc_pmu, THIS_MODULE); ret = perf_pmu_register(&ddrc_pmu->pmu, name, -1); diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c index 5701a84edb0e..15caf99e1eef 100644 --- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c @@ -510,6 +510,11 @@ static int hisi_hha_pmu_probe(struct platform_device *pdev) if (ret) return ret; + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_hha%u", + hha_pmu->sccl_id, hha_pmu->index_id); + if (!name) + return -ENOMEM; + ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE, &hha_pmu->node); if (ret) { @@ -517,8 +522,6 @@ static int hisi_hha_pmu_probe(struct platform_device *pdev) return ret; } - name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_hha%u", - hha_pmu->sccl_id, hha_pmu->index_id); hisi_pmu_init(hha_pmu, THIS_MODULE); ret = perf_pmu_register(&hha_pmu->pmu, name, -1); diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 68596b566344..794dbcd19b7a 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -544,6 +544,11 @@ static int hisi_l3c_pmu_probe(struct platform_device *pdev) if (ret) return ret; + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_l3c%u", + l3c_pmu->sccl_id, l3c_pmu->ccl_id); + if (!name) + return -ENOMEM; + ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_HISI_L3_ONLINE, &l3c_pmu->node); if (ret) { @@ -551,12 +556,6 @@ static int hisi_l3c_pmu_probe(struct platform_device *pdev) return ret; } - /* - * CCL_ID is used to identify the L3C in the same SCCL which was - * used _UID by mistake. - */ - name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_l3c%u", - l3c_pmu->sccl_id, l3c_pmu->ccl_id); hisi_pmu_init(l3c_pmu, THIS_MODULE); ret = perf_pmu_register(&l3c_pmu->pmu, name, -1); From 87727ba2bb05cc3cb4233231faa7ab4c7eeb6c73 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 20 Apr 2023 13:33:56 +0100 Subject: [PATCH 24/24] KVM: arm64: Ensure CPU PMU probes before pKVM host de-privilege Although pKVM supports CPU PMU emulation for non-protected guests since 722625c6f4c5 ("KVM: arm64: Reenable pmu in Protected Mode"), this relies on the PMU driver probing before the host has de-privileged so that the 'kvm_arm_pmu_available' static key can still be enabled by patching the hypervisor text. As it happens, both of these events hang off device_initcall() but the PMU consistently won the race until 7755cec63ade ("arm64: perf: Move PMUv3 driver to drivers/perf"). Since then, the host will fail to boot when pKVM is enabled: | hw perfevents: enabled with armv8_pmuv3_0 PMU driver, 7 counters available | kvm [1]: nVHE hyp BUG at: [] __kvm_nvhe_handle_host_mem_abort+0x270/0x284! | kvm [1]: Cannot dump pKVM nVHE stacktrace: !CONFIG_PROTECTED_NVHE_STACKTRACE | kvm [1]: Hyp Offset: 0xfffea41fbdf70000 | Kernel panic - not syncing: HYP panic: | PS:a00003c9 PC:0000dbe04b0c66e0 ESR:00000000f2000800 | FAR:fffffbfffddfcf00 HPFAR:00000000010b0bf0 PAR:0000000000000000 | VCPU:0000000000000000 | CPU: 2 PID: 1 Comm: swapper/0 Not tainted 6.3.0-rc7-00083-g0bce6746d154 #1 | Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015 | Call trace: | dump_backtrace+0xec/0x108 | show_stack+0x18/0x2c | dump_stack_lvl+0x50/0x68 | dump_stack+0x18/0x24 | panic+0x13c/0x33c | nvhe_hyp_panic_handler+0x10c/0x190 | aarch64_insn_patch_text_nosync+0x64/0xc8 | arch_jump_label_transform+0x4c/0x5c | __jump_label_update+0x84/0xfc | jump_label_update+0x100/0x134 | static_key_enable_cpuslocked+0x68/0xac | static_key_enable+0x20/0x34 | kvm_host_pmu_init+0x88/0xa4 | armpmu_register+0xf0/0xf4 | arm_pmu_acpi_probe+0x2ec/0x368 | armv8_pmu_driver_init+0x38/0x44 | do_one_initcall+0xcc/0x240 Fix the race properly by deferring the de-privilege step to device_initcall_sync(). This will also be needed in future when probing IOMMU devices and allows us to separate the pKVM de-privilege logic from the core hypervisor initialisation path. Cc: Oliver Upton Cc: Fuad Tabba Cc: Marc Zyngier Fixes: 7755cec63ade ("arm64: perf: Move PMUv3 driver to drivers/perf") Tested-by: Fuad Tabba Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20230420123356.2708-1-will@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kvm/arm.c | 45 ----------------------------------------- arch/arm64/kvm/pkvm.c | 47 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 45 deletions(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 3bd732eaf087..890f730bc3ab 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include @@ -46,7 +45,6 @@ #include static enum kvm_mode kvm_mode = KVM_MODE_DEFAULT; -DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized); DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector); @@ -2105,41 +2103,6 @@ static int __init init_hyp_mode(void) return err; } -static void __init _kvm_host_prot_finalize(void *arg) -{ - int *err = arg; - - if (WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize))) - WRITE_ONCE(*err, -EINVAL); -} - -static int __init pkvm_drop_host_privileges(void) -{ - int ret = 0; - - /* - * Flip the static key upfront as that may no longer be possible - * once the host stage 2 is installed. - */ - static_branch_enable(&kvm_protected_mode_initialized); - on_each_cpu(_kvm_host_prot_finalize, &ret, 1); - return ret; -} - -static int __init finalize_hyp_mode(void) -{ - if (!is_protected_kvm_enabled()) - return 0; - - /* - * Exclude HYP sections from kmemleak so that they don't get peeked - * at, which would end badly once inaccessible. - */ - kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start); - kmemleak_free_part_phys(hyp_mem_base, hyp_mem_size); - return pkvm_drop_host_privileges(); -} - struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr) { struct kvm_vcpu *vcpu; @@ -2257,14 +2220,6 @@ static __init int kvm_arm_init(void) if (err) goto out_hyp; - if (!in_hyp_mode) { - err = finalize_hyp_mode(); - if (err) { - kvm_err("Failed to finalize Hyp protection\n"); - goto out_subs; - } - } - if (is_protected_kvm_enabled()) { kvm_info("Protected nVHE mode initialized successfully\n"); } else if (in_hyp_mode) { diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c index cf56958b1492..6e9ece1ebbe7 100644 --- a/arch/arm64/kvm/pkvm.c +++ b/arch/arm64/kvm/pkvm.c @@ -4,6 +4,8 @@ * Author: Quentin Perret */ +#include +#include #include #include #include @@ -13,6 +15,8 @@ #include "hyp_constants.h" +DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized); + static struct memblock_region *hyp_memory = kvm_nvhe_sym(hyp_memory); static unsigned int *hyp_memblock_nr_ptr = &kvm_nvhe_sym(hyp_memblock_nr); @@ -213,3 +217,46 @@ int pkvm_init_host_vm(struct kvm *host_kvm) mutex_init(&host_kvm->lock); return 0; } + +static void __init _kvm_host_prot_finalize(void *arg) +{ + int *err = arg; + + if (WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize))) + WRITE_ONCE(*err, -EINVAL); +} + +static int __init pkvm_drop_host_privileges(void) +{ + int ret = 0; + + /* + * Flip the static key upfront as that may no longer be possible + * once the host stage 2 is installed. + */ + static_branch_enable(&kvm_protected_mode_initialized); + on_each_cpu(_kvm_host_prot_finalize, &ret, 1); + return ret; +} + +static int __init finalize_pkvm(void) +{ + int ret; + + if (!is_protected_kvm_enabled()) + return 0; + + /* + * Exclude HYP sections from kmemleak so that they don't get peeked + * at, which would end badly once inaccessible. + */ + kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start); + kmemleak_free_part_phys(hyp_mem_base, hyp_mem_size); + + ret = pkvm_drop_host_privileges(); + if (ret) + pr_err("Failed to finalize Hyp protection: %d\n", ret); + + return ret; +} +device_initcall_sync(finalize_pkvm);