From 56c7c6eaf3eb8ac1ec40d56096c0f2b27250da5f Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 3 Dec 2021 11:44:50 +0000 Subject: [PATCH 01/14] perf/arm-cmn: Fix CPU hotplug unregistration Attempting to migrate the PMU context after we've unregistered the PMU device, or especially if we never successfully registered it in the first place, is a woefully bad idea. It's also fundamentally pointless anyway. Make sure to unregister an instance from the hotplug handler *without* invoking the teardown callback. Fixes: 0ba64770a2f2 ("perf: Add Arm CMN-600 PMU driver") Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/2c221d745544774e4b07583b65b5d4d94f7e0fe4.1638530442.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/perf/arm-cmn.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index bc3cba5f8c5d..400eb7f579dc 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -1561,7 +1561,8 @@ static int arm_cmn_probe(struct platform_device *pdev) err = perf_pmu_register(&cmn->pmu, name, -1); if (err) - cpuhp_state_remove_instance(arm_cmn_hp_state, &cmn->cpuhp_node); + cpuhp_state_remove_instance_nocalls(arm_cmn_hp_state, &cmn->cpuhp_node); + return err; } @@ -1572,7 +1573,7 @@ static int arm_cmn_remove(struct platform_device *pdev) writel_relaxed(0, cmn->dtc[0].base + CMN_DT_DTC_CTL); perf_pmu_unregister(&cmn->pmu); - cpuhp_state_remove_instance(arm_cmn_hp_state, &cmn->cpuhp_node); + cpuhp_state_remove_instance_nocalls(arm_cmn_hp_state, &cmn->cpuhp_node); return 0; } From 6190741c294d1cad15198d5d2f912868434fa492 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 3 Dec 2021 11:44:51 +0000 Subject: [PATCH 02/14] perf/arm-cmn: Account for NUMA affinity On a system with multiple CMN meshes, ideally we'd want to access each PMU from within its own mesh, rather than with a long CML round-trip, wherever feasible. Since such a system is likely to be presented as multiple NUMA nodes, let's also hope a proximity domain is specified for each CMN programming interface, and use that to guide our choice of IRQ affinity to favour a node-local CPU where possible. Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/32438b0d016e0649d882d47d30ac2000484287b9.1638530442.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/perf/arm-cmn.c | 51 +++++++++++++++++++++++++++++++----------- 1 file changed, 38 insertions(+), 13 deletions(-) diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index 400eb7f579dc..02b898dbba91 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -1147,23 +1147,47 @@ static int arm_cmn_commit_txn(struct pmu *pmu) return 0; } -static int arm_cmn_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) +static void arm_cmn_migrate(struct arm_cmn *cmn, unsigned int cpu) +{ + unsigned int i; + + perf_pmu_migrate_context(&cmn->pmu, cmn->cpu, cpu); + for (i = 0; i < cmn->num_dtcs; i++) + irq_set_affinity(cmn->dtc[i].irq, cpumask_of(cpu)); + cmn->cpu = cpu; +} + +static int arm_cmn_pmu_online_cpu(unsigned int cpu, struct hlist_node *cpuhp_node) { struct arm_cmn *cmn; - unsigned int i, target; + int node; - cmn = hlist_entry_safe(node, struct arm_cmn, cpuhp_node); + cmn = hlist_entry_safe(cpuhp_node, struct arm_cmn, cpuhp_node); + node = dev_to_node(cmn->dev); + if (node != NUMA_NO_NODE && cpu_to_node(cmn->cpu) != node && cpu_to_node(cpu) == node) + arm_cmn_migrate(cmn, cpu); + return 0; +} + +static int arm_cmn_pmu_offline_cpu(unsigned int cpu, struct hlist_node *cpuhp_node) +{ + struct arm_cmn *cmn; + unsigned int target; + int node; + cpumask_t mask; + + cmn = hlist_entry_safe(cpuhp_node, struct arm_cmn, cpuhp_node); if (cpu != cmn->cpu) return 0; - target = cpumask_any_but(cpu_online_mask, cpu); - if (target >= nr_cpu_ids) - return 0; - - perf_pmu_migrate_context(&cmn->pmu, cpu, target); - for (i = 0; i < cmn->num_dtcs; i++) - irq_set_affinity(cmn->dtc[i].irq, cpumask_of(target)); - cmn->cpu = target; + node = dev_to_node(cmn->dev); + if (cpumask_and(&mask, cpumask_of_node(node), cpu_online_mask) && + cpumask_andnot(&mask, &mask, cpumask_of(cpu))) + target = cpumask_any(&mask); + else + target = cpumask_any_but(cpu_online_mask, cpu); + if (target < nr_cpu_ids) + arm_cmn_migrate(cmn, target); return 0; } @@ -1532,7 +1556,7 @@ static int arm_cmn_probe(struct platform_device *pdev) if (err) return err; - cmn->cpu = raw_smp_processor_id(); + cmn->cpu = cpumask_local_spread(0, dev_to_node(cmn->dev)); cmn->pmu = (struct pmu) { .module = THIS_MODULE, .attr_groups = arm_cmn_attr_groups, @@ -1608,7 +1632,8 @@ static int __init arm_cmn_init(void) int ret; ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, - "perf/arm/cmn:online", NULL, + "perf/arm/cmn:online", + arm_cmn_pmu_online_cpu, arm_cmn_pmu_offline_cpu); if (ret < 0) return ret; From 82d8ea4b450074e81748830929bbd94eebbaffea Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 3 Dec 2021 11:44:52 +0000 Subject: [PATCH 03/14] perf/arm-cmn: Drop compile-test restriction Although CMN is currently (and overwhelmingly likely to remain) deployed in arm64-only (modulo userspace) systems, the 64-bit "dependency" for compile-testing was just laziness due to heavy reliance on readq/writeq accessors. Since we only need one extra include for robustness in that regard, let's pull that in, widen the compile-test coverage, and fix up the smattering of type laziness that that brings to light. Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/baee9ee0d0bdad8aaeb70f5a4b98d8fd4b1f5786.1638530442.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/perf/Kconfig | 2 +- drivers/perf/arm-cmn.c | 25 +++++++++++++------------ 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index 4374af292e6d..1070515d340c 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -43,7 +43,7 @@ config ARM_CCN config ARM_CMN tristate "Arm CMN-600 PMU support" - depends on ARM64 || (COMPILE_TEST && 64BIT) + depends on ARM64 || COMPILE_TEST help Support for PMU events monitoring on the Arm CMN-600 Coherent Mesh Network interconnect. diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index 02b898dbba91..1d52fcfe3a0d 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -122,11 +123,11 @@ /* Event attributes */ -#define CMN_CONFIG_TYPE GENMASK(15, 0) -#define CMN_CONFIG_EVENTID GENMASK(23, 16) -#define CMN_CONFIG_OCCUPID GENMASK(27, 24) -#define CMN_CONFIG_BYNODEID BIT(31) -#define CMN_CONFIG_NODEID GENMASK(47, 32) +#define CMN_CONFIG_TYPE GENMASK_ULL(15, 0) +#define CMN_CONFIG_EVENTID GENMASK_ULL(23, 16) +#define CMN_CONFIG_OCCUPID GENMASK_ULL(27, 24) +#define CMN_CONFIG_BYNODEID BIT_ULL(31) +#define CMN_CONFIG_NODEID GENMASK_ULL(47, 32) #define CMN_EVENT_TYPE(event) FIELD_GET(CMN_CONFIG_TYPE, (event)->attr.config) #define CMN_EVENT_EVENTID(event) FIELD_GET(CMN_CONFIG_EVENTID, (event)->attr.config) @@ -134,13 +135,13 @@ #define CMN_EVENT_BYNODEID(event) FIELD_GET(CMN_CONFIG_BYNODEID, (event)->attr.config) #define CMN_EVENT_NODEID(event) FIELD_GET(CMN_CONFIG_NODEID, (event)->attr.config) -#define CMN_CONFIG_WP_COMBINE GENMASK(27, 24) -#define CMN_CONFIG_WP_DEV_SEL BIT(48) -#define CMN_CONFIG_WP_CHN_SEL GENMASK(50, 49) -#define CMN_CONFIG_WP_GRP BIT(52) -#define CMN_CONFIG_WP_EXCLUSIVE BIT(53) -#define CMN_CONFIG1_WP_VAL GENMASK(63, 0) -#define CMN_CONFIG2_WP_MASK GENMASK(63, 0) +#define CMN_CONFIG_WP_COMBINE GENMASK_ULL(27, 24) +#define CMN_CONFIG_WP_DEV_SEL BIT_ULL(48) +#define CMN_CONFIG_WP_CHN_SEL GENMASK_ULL(50, 49) +#define CMN_CONFIG_WP_GRP BIT_ULL(52) +#define CMN_CONFIG_WP_EXCLUSIVE BIT_ULL(53) +#define CMN_CONFIG1_WP_VAL GENMASK_ULL(63, 0) +#define CMN_CONFIG2_WP_MASK GENMASK_ULL(63, 0) #define CMN_EVENT_WP_COMBINE(event) FIELD_GET(CMN_CONFIG_WP_COMBINE, (event)->attr.config) #define CMN_EVENT_WP_DEV_SEL(event) FIELD_GET(CMN_CONFIG_WP_DEV_SEL, (event)->attr.config) From 5f167eab83f153c2c6f80cfe419e269d5f481b09 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 3 Dec 2021 11:44:53 +0000 Subject: [PATCH 04/14] perf/arm-cmn: Refactor node ID handling Add a bit more abstraction for the places where we decompose node IDs. This will help keep things nice and manageable when we come to add yet more variables which affect the node ID format. Also use the opportunity to move the rest of the low-level node management helpers back up to the logical place they were meant to be - how they ended up buried right in the middle of the event-related definitions is somewhat of a mystery... Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/a2242a8c3c96056c13a04ae87bf2047e5e64d2d9.1638530442.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/perf/arm-cmn.c | 94 +++++++++++++++++++++++++----------------- 1 file changed, 56 insertions(+), 38 deletions(-) diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index 1d52fcfe3a0d..adf50d613734 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -255,6 +255,58 @@ struct arm_cmn { static int arm_cmn_hp_state; +struct arm_cmn_nodeid { + u8 x; + u8 y; + u8 port; + u8 dev; +}; + +static int arm_cmn_xyidbits(const struct arm_cmn *cmn) +{ + int dim = max(cmn->mesh_x, cmn->mesh_y); + + return dim > 4 ? 3 : 2; +} + +static struct arm_cmn_nodeid arm_cmn_nid(const struct arm_cmn *cmn, u16 id) +{ + struct arm_cmn_nodeid nid; + int bits = arm_cmn_xyidbits(cmn); + + nid.x = CMN_NODEID_X(id, bits); + nid.y = CMN_NODEID_Y(id, bits); + nid.port = CMN_NODEID_PID(id); + nid.dev = CMN_NODEID_DEVID(id); + + return nid; +} + +static void arm_cmn_init_node_to_xp(const struct arm_cmn *cmn, + struct arm_cmn_node *dn) +{ + struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id); + int xp_idx = cmn->mesh_x * nid.y + nid.x; + + dn->to_xp = (cmn->xps + xp_idx) - dn; +} + +static struct arm_cmn_node *arm_cmn_node_to_xp(struct arm_cmn_node *dn) +{ + return dn->type == CMN_TYPE_XP ? dn : dn + dn->to_xp; +} + +static struct arm_cmn_node *arm_cmn_node(const struct arm_cmn *cmn, + enum cmn_node_type type) +{ + int i; + + for (i = 0; i < cmn->num_dns; i++) + if (cmn->dns[i].type == type) + return &cmn->dns[i]; + return NULL; +} + struct arm_cmn_hw_event { struct arm_cmn_node *dn; u64 dtm_idx[2]; @@ -295,38 +347,6 @@ struct arm_cmn_format_attr { int config; }; -static int arm_cmn_xyidbits(const struct arm_cmn *cmn) -{ - return cmn->mesh_x > 4 || cmn->mesh_y > 4 ? 3 : 2; -} - -static void arm_cmn_init_node_to_xp(const struct arm_cmn *cmn, - struct arm_cmn_node *dn) -{ - int bits = arm_cmn_xyidbits(cmn); - int x = CMN_NODEID_X(dn->id, bits); - int y = CMN_NODEID_Y(dn->id, bits); - int xp_idx = cmn->mesh_x * y + x; - - dn->to_xp = (cmn->xps + xp_idx) - dn; -} - -static struct arm_cmn_node *arm_cmn_node_to_xp(struct arm_cmn_node *dn) -{ - return dn->type == CMN_TYPE_XP ? dn : dn + dn->to_xp; -} - -static struct arm_cmn_node *arm_cmn_node(const struct arm_cmn *cmn, - enum cmn_node_type type) -{ - int i; - - for (i = 0; i < cmn->num_dns; i++) - if (cmn->dns[i].type == type) - return &cmn->dns[i]; - return NULL; -} - #define CMN_EVENT_ATTR(_name, _type, _eventid, _occupid) \ (&((struct arm_cmn_event_attr[]) {{ \ .attr = __ATTR(_name, 0444, arm_cmn_event_show, NULL), \ @@ -966,11 +986,10 @@ static int arm_cmn_event_init(struct perf_event *event) } if (!hw->num_dns) { - int bits = arm_cmn_xyidbits(cmn); + struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, nodeid); dev_dbg(cmn->dev, "invalid node 0x%x (%d,%d,%d,%d) type 0x%x\n", - nodeid, CMN_NODEID_X(nodeid, bits), CMN_NODEID_Y(nodeid, bits), - CMN_NODEID_PID(nodeid), CMN_NODEID_DEVID(nodeid), type); + nodeid, nid.x, nid.y, nid.port, nid.dev, type); return -EINVAL; } /* @@ -1068,11 +1087,10 @@ static int arm_cmn_event_add(struct perf_event *event, int flags) dn->wp_event[wp_idx] = dtc_idx; writel_relaxed(cfg, dn->pmu_base + CMN_DTM_WPn_CONFIG(wp_idx)); } else { - unsigned int port = CMN_NODEID_PID(dn->id); - unsigned int dev = CMN_NODEID_DEVID(dn->id); + struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id); input_sel = CMN__PMEVCNT0_INPUT_SEL_DEV + dtm_idx + - (port << 4) + (dev << 2); + (nid.port << 4) + (nid.dev << 2); if (arm_cmn_is_occup_event(type, CMN_EVENT_EVENTID(event))) { int occupid = CMN_EVENT_OCCUPID(event); From da5f7d2c8019c9dd053e2d94fdc1b3e7c03c35a5 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 3 Dec 2021 11:44:54 +0000 Subject: [PATCH 05/14] perf/arm-cmn: Streamline node iteration Refactor the places where we scan through the set of nodes to switch from explicit array indexing to pointer-based iteration. This leads to slightly simpler object code, but also makes the source less dense and more pleasant for further development. It also unearths an almost-bug in arm_cmn_event_init() where we've been depending on the "array index" of NULL relative to cmn->dns being a sufficiently large number, yuck. Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/ee0c9eda9a643f46001ac43aadf3f0b1fd5660dd.1638530442.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/perf/arm-cmn.c | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index adf50d613734..da7bf779fec2 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -299,11 +299,11 @@ static struct arm_cmn_node *arm_cmn_node_to_xp(struct arm_cmn_node *dn) static struct arm_cmn_node *arm_cmn_node(const struct arm_cmn *cmn, enum cmn_node_type type) { - int i; + struct arm_cmn_node *dn; - for (i = 0; i < cmn->num_dns; i++) - if (cmn->dns[i].type == type) - return &cmn->dns[i]; + for (dn = cmn->dns; dn->type; dn++) + if (dn->type == type) + return dn; return NULL; } @@ -941,8 +941,8 @@ static int arm_cmn_event_init(struct perf_event *event) { struct arm_cmn *cmn = to_cmn(event->pmu); struct arm_cmn_hw_event *hw = to_cmn_hw(event); + struct arm_cmn_node *dn; enum cmn_node_type type; - unsigned int i; bool bynodeid; u16 nodeid, eventid; @@ -974,10 +974,12 @@ static int arm_cmn_event_init(struct perf_event *event) nodeid = CMN_EVENT_NODEID(event); hw->dn = arm_cmn_node(cmn, type); - for (i = hw->dn - cmn->dns; i < cmn->num_dns && cmn->dns[i].type == type; i++) { + if (!hw->dn) + return -EINVAL; + for (dn = hw->dn; dn->type == type; dn++) { if (!bynodeid) { hw->num_dns++; - } else if (cmn->dns[i].id != nodeid) { + } else if (dn->id != nodeid) { hw->dn++; } else { hw->num_dns = 1; @@ -1332,7 +1334,7 @@ static int arm_cmn_init_dtcs(struct arm_cmn *cmn) cmn->xps = arm_cmn_node(cmn, CMN_TYPE_XP); - for (dn = cmn->dns; dn < cmn->dns + cmn->num_dns; dn++) { + for (dn = cmn->dns; dn->type; dn++) { if (dn->type != CMN_TYPE_XP) arm_cmn_init_node_to_xp(cmn, dn); else if (cmn->num_dtcs == 1) @@ -1382,6 +1384,7 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) u32 xp_offset[CMN_MAX_XPS]; u64 reg; int i, j; + size_t sz; cfg_region = cmn->base + rgn_offset; reg = readl_relaxed(cfg_region + CMN_CFGM_PERIPH_ID_2); @@ -1408,14 +1411,13 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) cmn->num_dns += FIELD_GET(CMN_CI_CHILD_COUNT, reg); } - /* Cheeky +1 to help terminate pointer-based iteration */ - cmn->dns = devm_kcalloc(cmn->dev, cmn->num_dns + 1, - sizeof(*cmn->dns), GFP_KERNEL); - if (!cmn->dns) + /* Cheeky +1 to help terminate pointer-based iteration later */ + dn = devm_kcalloc(cmn->dev, cmn->num_dns + 1, sizeof(*dn), GFP_KERNEL); + if (!dn) return -ENOMEM; /* Pass 2: now we can actually populate the nodes */ - dn = cmn->dns; + cmn->dns = dn; for (i = 0; i < cmn->num_xps; i++) { void __iomem *xp_region = cmn->base + xp_offset[i]; struct arm_cmn_node *xp = dn++; @@ -1484,6 +1486,11 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) /* Correct for any nodes we skipped */ cmn->num_dns = dn - cmn->dns; + sz = (void *)(dn + 1) - (void *)cmn->dns; + dn = devm_krealloc(cmn->dev, cmn->dns, sz, GFP_KERNEL); + if (dn) + cmn->dns = dn; + /* * If mesh_x wasn't set during discovery then we never saw * an XP at (0,1), thus we must have an Nx1 configuration. From 0947c80aba23972987a88e620812d17a7af27297 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 3 Dec 2021 11:44:55 +0000 Subject: [PATCH 06/14] perf/arm-cmn: Refactor DTM handling Untangle DTMs from XPs into a dedicated abstraction. This helps make things a little more obvious and robust, but primarily paves the way for further development where new IPs can grow extra DTMs per XP. Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/9cca18b1b98f482df7f1aaf3d3213e7f39500423.1638530442.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/perf/arm-cmn.c | 169 +++++++++++++++++++++-------------------- 1 file changed, 87 insertions(+), 82 deletions(-) diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index da7bf779fec2..8b98ca9666d0 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -35,14 +35,9 @@ #define CMN_CHILD_NODE_ADDR GENMASK(27, 0) #define CMN_CHILD_NODE_EXTERNAL BIT(31) -#define CMN_ADDR_NODE_PTR GENMASK(27, 14) - -#define CMN_NODE_PTR_DEVID(ptr) (((ptr) >> 2) & 3) -#define CMN_NODE_PTR_PID(ptr) ((ptr) & 1) -#define CMN_NODE_PTR_X(ptr, bits) ((ptr) >> (6 + (bits))) -#define CMN_NODE_PTR_Y(ptr, bits) (((ptr) >> 6) & ((1U << (bits)) - 1)) - -#define CMN_MAX_XPS (8 * 8) +#define CMN_MAX_DIMENSION 8 +#define CMN_MAX_XPS (CMN_MAX_DIMENSION * CMN_MAX_DIMENSION) +#define CMN_MAX_DTMS CMN_MAX_XPS /* The CFG node has one other useful purpose */ #define CMN_CFGM_PERIPH_ID_2 0x0010 @@ -190,32 +185,32 @@ struct arm_cmn_node { u16 id, logid; enum cmn_node_type type; + int dtm; union { - /* Device node */ + /* DN/HN-F/CXHA */ struct { - int to_xp; - /* DN/HN-F/CXHA */ - unsigned int occupid_val; - unsigned int occupid_count; + u8 occupid_val; + u8 occupid_count; }; /* XP */ - struct { - int dtc; - u32 pmu_config_low; - union { - u8 input_sel[4]; - __le32 pmu_config_high; - }; - s8 wp_event[4]; - }; + int dtc; }; - union { u8 event[4]; __le32 event_sel; }; }; +struct arm_cmn_dtm { + void __iomem *base; + u32 pmu_config_low; + union { + u8 input_sel[4]; + __le32 pmu_config_high; + }; + s8 wp_event[4]; +}; + struct arm_cmn_dtc { void __iomem *base; int irq; @@ -241,6 +236,7 @@ struct arm_cmn { struct arm_cmn_node *xps; struct arm_cmn_node *dns; + struct arm_cmn_dtm *dtms; struct arm_cmn_dtc *dtc; unsigned int num_dtcs; @@ -282,20 +278,14 @@ static struct arm_cmn_nodeid arm_cmn_nid(const struct arm_cmn *cmn, u16 id) return nid; } -static void arm_cmn_init_node_to_xp(const struct arm_cmn *cmn, - struct arm_cmn_node *dn) +static struct arm_cmn_node *arm_cmn_node_to_xp(const struct arm_cmn *cmn, + const struct arm_cmn_node *dn) { struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id); int xp_idx = cmn->mesh_x * nid.y + nid.x; - dn->to_xp = (cmn->xps + xp_idx) - dn; + return cmn->xps + xp_idx; } - -static struct arm_cmn_node *arm_cmn_node_to_xp(struct arm_cmn_node *dn) -{ - return dn->type == CMN_TYPE_XP ? dn : dn + dn->to_xp; -} - static struct arm_cmn_node *arm_cmn_node(const struct arm_cmn *cmn, enum cmn_node_type type) { @@ -706,9 +696,9 @@ static u64 arm_cmn_read_dtm(struct arm_cmn *cmn, struct arm_cmn_hw_event *hw, offset = snapshot ? CMN_DTM_PMEVCNTSR : CMN_DTM_PMEVCNT; for_each_hw_dn(hw, dn, i) { - struct arm_cmn_node *xp = arm_cmn_node_to_xp(dn); + struct arm_cmn_dtm *dtm = &cmn->dtms[dn->dtm]; int dtm_idx = arm_cmn_get_index(hw->dtm_idx, i); - u64 reg = readq_relaxed(xp->pmu_base + offset); + u64 reg = readq_relaxed(dtm->base + offset); u16 dtm_count = reg >> (dtm_idx * 16); count += dtm_count; @@ -835,9 +825,9 @@ static void arm_cmn_event_stop(struct perf_event *event, int flags) } struct arm_cmn_val { - u8 dtm_count[CMN_MAX_XPS]; - u8 occupid[CMN_MAX_XPS]; - u8 wp[CMN_MAX_XPS][4]; + u8 dtm_count[CMN_MAX_DTMS]; + u8 occupid[CMN_MAX_DTMS]; + u8 wp[CMN_MAX_DTMS][4]; int dtc_count; bool cycles; }; @@ -866,16 +856,16 @@ static void arm_cmn_val_add_event(struct arm_cmn_val *val, struct perf_event *ev occupid = 0; for_each_hw_dn(hw, dn, i) { - int wp_idx, xp = arm_cmn_node_to_xp(dn)->logid; + int wp_idx, dtm = dn->dtm; - val->dtm_count[xp]++; - val->occupid[xp] = occupid; + val->dtm_count[dtm]++; + val->occupid[dtm] = occupid; if (type != CMN_TYPE_WP) continue; wp_idx = arm_cmn_wp_idx(event); - val->wp[xp][wp_idx] = CMN_EVENT_WP_COMBINE(event) + 1; + val->wp[dtm][wp_idx] = CMN_EVENT_WP_COMBINE(event) + 1; } } @@ -914,22 +904,22 @@ static int arm_cmn_validate_group(struct perf_event *event) occupid = 0; for_each_hw_dn(hw, dn, i) { - int wp_idx, wp_cmb, xp = arm_cmn_node_to_xp(dn)->logid; + int wp_idx, wp_cmb, dtm = dn->dtm; - if (val.dtm_count[xp] == CMN_DTM_NUM_COUNTERS) + if (val.dtm_count[dtm] == CMN_DTM_NUM_COUNTERS) return -EINVAL; - if (occupid && val.occupid[xp] && occupid != val.occupid[xp]) + if (occupid && val.occupid[dtm] && occupid != val.occupid[dtm]) return -EINVAL; if (type != CMN_TYPE_WP) continue; wp_idx = arm_cmn_wp_idx(event); - if (val.wp[xp][wp_idx]) + if (val.wp[dtm][wp_idx]) return -EINVAL; - wp_cmb = val.wp[xp][wp_idx ^ 1]; + wp_cmb = val.wp[dtm][wp_idx ^ 1]; if (wp_cmb && wp_cmb != CMN_EVENT_WP_COMBINE(event) + 1) return -EINVAL; } @@ -1010,17 +1000,17 @@ static void arm_cmn_event_clear(struct arm_cmn *cmn, struct perf_event *event, enum cmn_node_type type = CMN_EVENT_TYPE(event); while (i--) { - struct arm_cmn_node *xp = arm_cmn_node_to_xp(hw->dn + i); + struct arm_cmn_dtm *dtm = &cmn->dtms[hw->dn[i].dtm]; unsigned int dtm_idx = arm_cmn_get_index(hw->dtm_idx, i); if (type == CMN_TYPE_WP) - hw->dn[i].wp_event[arm_cmn_wp_idx(event)] = -1; + dtm->wp_event[arm_cmn_wp_idx(event)] = -1; if (arm_cmn_is_occup_event(type, CMN_EVENT_EVENTID(event))) hw->dn[i].occupid_count--; - xp->pmu_config_low &= ~CMN__PMEVCNT_PAIRED(dtm_idx); - writel_relaxed(xp->pmu_config_low, xp->pmu_base + CMN_DTM_PMU_CONFIG); + dtm->pmu_config_low &= ~CMN__PMEVCNT_PAIRED(dtm_idx); + writel_relaxed(dtm->pmu_config_low, dtm->base + CMN_DTM_PMU_CONFIG); } memset(hw->dtm_idx, 0, sizeof(hw->dtm_idx)); @@ -1062,12 +1052,12 @@ static int arm_cmn_event_add(struct perf_event *event, int flags) /* ...then the local counters to feed it. */ for_each_hw_dn(hw, dn, i) { - struct arm_cmn_node *xp = arm_cmn_node_to_xp(dn); + struct arm_cmn_dtm *dtm = &cmn->dtms[dn->dtm]; unsigned int dtm_idx, shift; u64 reg; dtm_idx = 0; - while (xp->pmu_config_low & CMN__PMEVCNT_PAIRED(dtm_idx)) + while (dtm->pmu_config_low & CMN__PMEVCNT_PAIRED(dtm_idx)) if (++dtm_idx == CMN_DTM_NUM_COUNTERS) goto free_dtms; @@ -1077,17 +1067,17 @@ static int arm_cmn_event_add(struct perf_event *event, int flags) int tmp, wp_idx = arm_cmn_wp_idx(event); u32 cfg = arm_cmn_wp_config(event); - if (dn->wp_event[wp_idx] >= 0) + if (dtm->wp_event[wp_idx] >= 0) goto free_dtms; - tmp = dn->wp_event[wp_idx ^ 1]; + tmp = dtm->wp_event[wp_idx ^ 1]; if (tmp >= 0 && CMN_EVENT_WP_COMBINE(event) != CMN_EVENT_WP_COMBINE(dtc->counters[tmp])) goto free_dtms; input_sel = CMN__PMEVCNT0_INPUT_SEL_WP + wp_idx; - dn->wp_event[wp_idx] = dtc_idx; - writel_relaxed(cfg, dn->pmu_base + CMN_DTM_WPn_CONFIG(wp_idx)); + dtm->wp_event[wp_idx] = dtc_idx; + writel_relaxed(cfg, dtm->base + CMN_DTM_WPn_CONFIG(wp_idx)); } else { struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id); @@ -1095,7 +1085,7 @@ static int arm_cmn_event_add(struct perf_event *event, int flags) (nid.port << 4) + (nid.dev << 2); if (arm_cmn_is_occup_event(type, CMN_EVENT_EVENTID(event))) { - int occupid = CMN_EVENT_OCCUPID(event); + u8 occupid = CMN_EVENT_OCCUPID(event); if (dn->occupid_count == 0) { dn->occupid_val = occupid; @@ -1110,13 +1100,13 @@ static int arm_cmn_event_add(struct perf_event *event, int flags) arm_cmn_set_index(hw->dtm_idx, i, dtm_idx); - xp->input_sel[dtm_idx] = input_sel; + dtm->input_sel[dtm_idx] = input_sel; shift = CMN__PMEVCNTn_GLOBAL_NUM_SHIFT(dtm_idx); - xp->pmu_config_low &= ~(CMN__PMEVCNT0_GLOBAL_NUM << shift); - xp->pmu_config_low |= FIELD_PREP(CMN__PMEVCNT0_GLOBAL_NUM, dtc_idx) << shift; - xp->pmu_config_low |= CMN__PMEVCNT_PAIRED(dtm_idx); - reg = (u64)le32_to_cpu(xp->pmu_config_high) << 32 | xp->pmu_config_low; - writeq_relaxed(reg, xp->pmu_base + CMN_DTM_PMU_CONFIG); + dtm->pmu_config_low &= ~(CMN__PMEVCNT0_GLOBAL_NUM << shift); + dtm->pmu_config_low |= FIELD_PREP(CMN__PMEVCNT0_GLOBAL_NUM, dtc_idx) << shift; + dtm->pmu_config_low |= CMN__PMEVCNT_PAIRED(dtm_idx); + reg = (u64)le32_to_cpu(dtm->pmu_config_high) << 32 | dtm->pmu_config_low; + writeq_relaxed(reg, dtm->base + CMN_DTM_PMU_CONFIG); } /* Go go go! */ @@ -1276,23 +1266,22 @@ static int arm_cmn_init_irqs(struct arm_cmn *cmn) return 0; } -static void arm_cmn_init_dtm(struct arm_cmn_node *xp) +static void arm_cmn_init_dtm(struct arm_cmn_dtm *dtm, struct arm_cmn_node *xp) { int i; + dtm->base = xp->pmu_base; + dtm->pmu_config_low = CMN_DTM_PMU_CONFIG_PMU_EN; for (i = 0; i < 4; i++) { - xp->wp_event[i] = -1; - writeq_relaxed(0, xp->pmu_base + CMN_DTM_WPn_MASK(i)); - writeq_relaxed(~0ULL, xp->pmu_base + CMN_DTM_WPn_VAL(i)); + dtm->wp_event[i] = -1; + writeq_relaxed(0, dtm->base + CMN_DTM_WPn_MASK(i)); + writeq_relaxed(~0ULL, dtm->base + CMN_DTM_WPn_VAL(i)); } - xp->pmu_config_low = CMN_DTM_PMU_CONFIG_PMU_EN; - xp->dtc = -1; } static int arm_cmn_init_dtc(struct arm_cmn *cmn, struct arm_cmn_node *dn, int idx) { struct arm_cmn_dtc *dtc = cmn->dtc + idx; - struct arm_cmn_node *xp; dtc->base = dn->pmu_base - CMN_PMU_OFFSET; dtc->irq = platform_get_irq(to_platform_device(cmn->dev), idx); @@ -1303,10 +1292,6 @@ static int arm_cmn_init_dtc(struct arm_cmn *cmn, struct arm_cmn_node *dn, int id writel_relaxed(0x1ff, dtc->base + CMN_DT_PMOVSR_CLR); writel_relaxed(CMN_DT_PMCR_OVFL_INTR_EN, dtc->base + CMN_DT_PMCR); - /* We do at least know that a DTC's XP must be in that DTC's domain */ - xp = arm_cmn_node_to_xp(dn); - xp->dtc = idx; - return 0; } @@ -1323,7 +1308,7 @@ static int arm_cmn_node_cmp(const void *a, const void *b) static int arm_cmn_init_dtcs(struct arm_cmn *cmn) { - struct arm_cmn_node *dn; + struct arm_cmn_node *dn, *xp; int dtc_idx = 0; cmn->dtc = devm_kcalloc(cmn->dev, cmn->num_dtcs, sizeof(cmn->dtc[0]), GFP_KERNEL); @@ -1335,13 +1320,24 @@ static int arm_cmn_init_dtcs(struct arm_cmn *cmn) cmn->xps = arm_cmn_node(cmn, CMN_TYPE_XP); for (dn = cmn->dns; dn->type; dn++) { - if (dn->type != CMN_TYPE_XP) - arm_cmn_init_node_to_xp(cmn, dn); - else if (cmn->num_dtcs == 1) - dn->dtc = 0; + if (dn->type == CMN_TYPE_XP) { + if (dn->dtc < 0 && cmn->num_dtcs == 1) + dn->dtc = 0; + continue; + } - if (dn->type == CMN_TYPE_DTC) - arm_cmn_init_dtc(cmn, dn, dtc_idx++); + xp = arm_cmn_node_to_xp(cmn, dn); + dn->dtm = xp->dtm; + + if (dn->type == CMN_TYPE_DTC) { + int err; + /* We do at least know that a DTC's XP must be in that DTC's domain */ + if (xp->dtc < 0) + xp->dtc = dtc_idx; + err = arm_cmn_init_dtc(cmn, dn, dtc_idx++); + if (err) + return err; + } /* To the PMU, RN-Ds don't add anything over RN-Is, so smoosh them together */ if (dn->type == CMN_TYPE_RND) @@ -1380,6 +1376,7 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) { void __iomem *cfg_region; struct arm_cmn_node cfg, *dn; + struct arm_cmn_dtm *dtm; u16 child_count, child_poff; u32 xp_offset[CMN_MAX_XPS]; u64 reg; @@ -1416,14 +1413,18 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) if (!dn) return -ENOMEM; + dtm = devm_kcalloc(cmn->dev, cmn->num_xps, sizeof(*dtm), GFP_KERNEL); + if (!dtm) + return -ENOMEM; + /* Pass 2: now we can actually populate the nodes */ cmn->dns = dn; + cmn->dtms = dtm; for (i = 0; i < cmn->num_xps; i++) { void __iomem *xp_region = cmn->base + xp_offset[i]; struct arm_cmn_node *xp = dn++; arm_cmn_init_node_info(cmn, xp_offset[i], xp); - arm_cmn_init_dtm(xp); /* * Thanks to the order in which XP logical IDs seem to be * assigned, we can handily infer the mesh X dimension by @@ -1433,6 +1434,10 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) if (xp->id == (1 << 3)) cmn->mesh_x = xp->logid; + xp->dtc = -1; + xp->dtm = dtm - cmn->dtms; + arm_cmn_init_dtm(dtm++, xp); + reg = readq_relaxed(xp_region + CMN_CHILD_INFO); child_count = FIELD_GET(CMN_CI_CHILD_COUNT, reg); child_poff = FIELD_GET(CMN_CI_CHILD_PTR_OFFSET, reg); From 847eef94e6327dd7690bfac0bd3a81a7ba6aa1ee Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 3 Dec 2021 11:44:56 +0000 Subject: [PATCH 07/14] perf/arm-cmn: Optimise DTM counter reads When multiple nodes of the same type are connected to the same XP (particularly in CAL configurations), it seems that they are likely to be consecutive in logical ID. Therefore, we're likely to gain a small benefit from an easy tweak to optimise out consecutive reads of the same set of DTM counters for an aggregated event. Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/7777d77c2df17693cd3dabb6e268906e15238d82.1638530442.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/perf/arm-cmn.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index 8b98ca9666d0..005a0d83bcac 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -690,18 +690,19 @@ static void arm_cmn_pmu_disable(struct pmu *pmu) static u64 arm_cmn_read_dtm(struct arm_cmn *cmn, struct arm_cmn_hw_event *hw, bool snapshot) { + struct arm_cmn_dtm *dtm = NULL; struct arm_cmn_node *dn; - unsigned int i, offset; - u64 count = 0; + unsigned int i, offset, dtm_idx; + u64 reg, count = 0; offset = snapshot ? CMN_DTM_PMEVCNTSR : CMN_DTM_PMEVCNT; for_each_hw_dn(hw, dn, i) { - struct arm_cmn_dtm *dtm = &cmn->dtms[dn->dtm]; - int dtm_idx = arm_cmn_get_index(hw->dtm_idx, i); - u64 reg = readq_relaxed(dtm->base + offset); - u16 dtm_count = reg >> (dtm_idx * 16); - - count += dtm_count; + if (dtm != &cmn->dtms[dn->dtm]) { + dtm = &cmn->dtms[dn->dtm]; + reg = readq_relaxed(dtm->base + offset); + } + dtm_idx = arm_cmn_get_index(hw->dtm_idx, i); + count += (u16)(reg >> (dtm_idx * 16)); } return count; } From 4f2c3872dde55090bf39e1f12a8517a32b6cd048 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 3 Dec 2021 11:44:57 +0000 Subject: [PATCH 08/14] perf/arm-cmn: Optimise DTC counter accesses In cases where we do know which DTC domain a node belongs to, we can skip initialising or reading the global count in DTCs where we know it won't change. The machinery to achieve that is mostly in place already, so finish hooking it up by converting the vestigial domain tracking to propagate suitable bitmaps all the way through to events. Note that this does not allow allocating such an unused counter to a different event on that DTC, because that is a flippin' nightmare. Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/51d930fd945ef51c81f5889ccca055c302b0a1d0.1638530442.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/perf/arm-cmn.c | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index 005a0d83bcac..acff8683af2c 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -193,7 +193,7 @@ struct arm_cmn_node { u8 occupid_count; }; /* XP */ - int dtc; + u8 dtc; }; union { u8 event[4]; @@ -968,14 +968,14 @@ static int arm_cmn_event_init(struct perf_event *event) if (!hw->dn) return -EINVAL; for (dn = hw->dn; dn->type == type; dn++) { - if (!bynodeid) { - hw->num_dns++; - } else if (dn->id != nodeid) { + if (bynodeid && dn->id != nodeid) { hw->dn++; - } else { - hw->num_dns = 1; - break; + continue; } + hw->dtcs_used |= arm_cmn_node_to_xp(cmn, dn)->dtc; + hw->num_dns++; + if (bynodeid) + break; } if (!hw->num_dns) { @@ -985,11 +985,6 @@ static int arm_cmn_event_init(struct perf_event *event) nodeid, nid.x, nid.y, nid.port, nid.dev, type); return -EINVAL; } - /* - * By assuming events count in all DTC domains, we cunningly avoid - * needing to know anything about how XPs are assigned to domains. - */ - hw->dtcs_used = (1U << cmn->num_dtcs) - 1; return arm_cmn_validate_group(event); } @@ -1311,6 +1306,7 @@ static int arm_cmn_init_dtcs(struct arm_cmn *cmn) { struct arm_cmn_node *dn, *xp; int dtc_idx = 0; + u8 dtcs_present = (1 << cmn->num_dtcs) - 1; cmn->dtc = devm_kcalloc(cmn->dev, cmn->num_dtcs, sizeof(cmn->dtc[0]), GFP_KERNEL); if (!cmn->dtc) @@ -1322,8 +1318,7 @@ static int arm_cmn_init_dtcs(struct arm_cmn *cmn) for (dn = cmn->dns; dn->type; dn++) { if (dn->type == CMN_TYPE_XP) { - if (dn->dtc < 0 && cmn->num_dtcs == 1) - dn->dtc = 0; + dn->dtc &= dtcs_present; continue; } @@ -1333,8 +1328,8 @@ static int arm_cmn_init_dtcs(struct arm_cmn *cmn) if (dn->type == CMN_TYPE_DTC) { int err; /* We do at least know that a DTC's XP must be in that DTC's domain */ - if (xp->dtc < 0) - xp->dtc = dtc_idx; + if (xp->dtc == 0xf) + xp->dtc = 1 << dtc_idx; err = arm_cmn_init_dtc(cmn, dn, dtc_idx++); if (err) return err; @@ -1435,7 +1430,7 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) if (xp->id == (1 << 3)) cmn->mesh_x = xp->logid; - xp->dtc = -1; + xp->dtc = 0xf; xp->dtm = dtm - cmn->dtms; arm_cmn_init_dtm(dtm++, xp); From 558a07807038017255005a4820f600da643d8a5f Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 3 Dec 2021 11:44:58 +0000 Subject: [PATCH 09/14] perf/arm-cmn: Move group validation data off-stack With the value of CMN_MAX_DTMS increasing significantly, our validation data structure is set to get quite big. Technically we could pack it at least twice as densely, since we only need around 19 bits of information per DTM, but that makes the code even more mind-bogglingly impenetrable, and even half of "quite big" may still be uncomfortably large for a stack frame (~1KB). Just move it to an off-stack allocation instead. Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/0cabff2e5839ddc0979e757c55515966f65359e4.1638530442.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/perf/arm-cmn.c | 43 ++++++++++++++++++++++++------------------ 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index acff8683af2c..d2dd02f040b8 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -876,8 +876,8 @@ static int arm_cmn_validate_group(struct perf_event *event) struct arm_cmn_node *dn; struct perf_event *sibling, *leader = event->group_leader; enum cmn_node_type type; - struct arm_cmn_val val; - int i; + struct arm_cmn_val *val; + int i, ret = -EINVAL; u8 occupid; if (leader == event) @@ -886,18 +886,22 @@ static int arm_cmn_validate_group(struct perf_event *event) if (event->pmu != leader->pmu && !is_software_event(leader)) return -EINVAL; - memset(&val, 0, sizeof(val)); + val = kzalloc(sizeof(*val), GFP_KERNEL); + if (!val) + return -ENOMEM; - arm_cmn_val_add_event(&val, leader); + arm_cmn_val_add_event(val, leader); for_each_sibling_event(sibling, leader) - arm_cmn_val_add_event(&val, sibling); + arm_cmn_val_add_event(val, sibling); type = CMN_EVENT_TYPE(event); - if (type == CMN_TYPE_DTC) - return val.cycles ? -EINVAL : 0; + if (type == CMN_TYPE_DTC) { + ret = val->cycles ? -EINVAL : 0; + goto done; + } - if (val.dtc_count == CMN_DT_NUM_COUNTERS) - return -EINVAL; + if (val->dtc_count == CMN_DT_NUM_COUNTERS) + goto done; if (arm_cmn_is_occup_event(type, CMN_EVENT_EVENTID(event))) occupid = CMN_EVENT_OCCUPID(event) + 1; @@ -907,25 +911,28 @@ static int arm_cmn_validate_group(struct perf_event *event) for_each_hw_dn(hw, dn, i) { int wp_idx, wp_cmb, dtm = dn->dtm; - if (val.dtm_count[dtm] == CMN_DTM_NUM_COUNTERS) - return -EINVAL; + if (val->dtm_count[dtm] == CMN_DTM_NUM_COUNTERS) + goto done; - if (occupid && val.occupid[dtm] && occupid != val.occupid[dtm]) - return -EINVAL; + if (occupid && val->occupid[dtm] && occupid != val->occupid[dtm]) + goto done; if (type != CMN_TYPE_WP) continue; wp_idx = arm_cmn_wp_idx(event); - if (val.wp[dtm][wp_idx]) - return -EINVAL; + if (val->wp[dtm][wp_idx]) + goto done; - wp_cmb = val.wp[dtm][wp_idx ^ 1]; + wp_cmb = val->wp[dtm][wp_idx ^ 1]; if (wp_cmb && wp_cmb != CMN_EVENT_WP_COMBINE(event) + 1) - return -EINVAL; + goto done; } - return 0; + ret = 0; +done: + kfree(val); + return ret; } static int arm_cmn_event_init(struct perf_event *event) From 61ec1d875812046ff9d473183d53e19dcd6b2ada Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 3 Dec 2021 11:44:59 +0000 Subject: [PATCH 10/14] perf/arm-cmn: Demarcate CMN-600 specifics In preparation for supporting newer CMN products, let's introduce a means to differentiate the features and events which are specific to a particular IP from those which remain common to the whole family. The newer designs have also smoothed off some of the rough edges in terms of discoverability, so separate out the parts of the flow which have effectively now become CMN-600 quirks. Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/9f6368cdca4c821d801138939508a5bba54ccabb.1638530442.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/perf/arm-cmn.c | 301 +++++++++++++++++++++-------------------- 1 file changed, 156 insertions(+), 145 deletions(-) diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index d2dd02f040b8..ce94f923a607 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -151,7 +151,12 @@ #define CMN_WP_DOWN 2 -/* r0px probably don't exist in silicon, thankfully */ +enum cmn_model { + CMN_ANY = -1, + CMN600 = 1, +}; + +/* CMN-600 r0px shouldn't exist in silicon, thankfully */ enum cmn_revision { CMN600_R1P0, CMN600_R1P1, @@ -159,6 +164,7 @@ enum cmn_revision { CMN600_R1P3, CMN600_R2P0, CMN600_R3P0, + CMN600_R3P1, }; enum cmn_node_type { @@ -229,6 +235,7 @@ struct arm_cmn { void __iomem *base; enum cmn_revision rev; + enum cmn_model model; u8 mesh_x; u8 mesh_y; u16 num_xps; @@ -326,6 +333,7 @@ static unsigned int arm_cmn_get_index(u64 x[], unsigned int pos) struct arm_cmn_event_attr { struct device_attribute attr; + enum cmn_model model; enum cmn_node_type type; u8 eventid; u8 occupid; @@ -337,9 +345,10 @@ struct arm_cmn_format_attr { int config; }; -#define CMN_EVENT_ATTR(_name, _type, _eventid, _occupid) \ +#define CMN_EVENT_ATTR(_model, _name, _type, _eventid, _occupid) \ (&((struct arm_cmn_event_attr[]) {{ \ .attr = __ATTR(_name, 0444, arm_cmn_event_show, NULL), \ + .model = _model, \ .type = _type, \ .eventid = _eventid, \ .occupid = _occupid, \ @@ -386,12 +395,15 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj, eattr = container_of(attr, typeof(*eattr), attr.attr); type = eattr->type; + if (!(eattr->model & cmn->model)) + return 0; + /* Watchpoints aren't nodes */ if (type == CMN_TYPE_WP) type = CMN_TYPE_XP; /* Revision-specific differences */ - if (cmn->rev < CMN600_R1P2) { + if (cmn->model == CMN600 && cmn->rev < CMN600_R1P2) { if (type == CMN_TYPE_HNF && eattr->eventid == 0x1b) return 0; } @@ -402,25 +414,27 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj, return attr->mode; } -#define _CMN_EVENT_DVM(_name, _event, _occup) \ - CMN_EVENT_ATTR(dn_##_name, CMN_TYPE_DVM, _event, _occup) +#define _CMN_EVENT_DVM(_model, _name, _event, _occup) \ + CMN_EVENT_ATTR(_model, dn_##_name, CMN_TYPE_DVM, _event, _occup) #define CMN_EVENT_DTC(_name) \ - CMN_EVENT_ATTR(dtc_##_name, CMN_TYPE_DTC, 0, 0) -#define _CMN_EVENT_HNF(_name, _event, _occup) \ - CMN_EVENT_ATTR(hnf_##_name, CMN_TYPE_HNF, _event, _occup) + CMN_EVENT_ATTR(CMN_ANY, dtc_##_name, CMN_TYPE_DTC, 0, 0) +#define _CMN_EVENT_HNF(_model, _name, _event, _occup) \ + CMN_EVENT_ATTR(_model, hnf_##_name, CMN_TYPE_HNF, _event, _occup) #define CMN_EVENT_HNI(_name, _event) \ - CMN_EVENT_ATTR(hni_##_name, CMN_TYPE_HNI, _event, 0) + CMN_EVENT_ATTR(CMN_ANY, hni_##_name, CMN_TYPE_HNI, _event, 0) #define __CMN_EVENT_XP(_name, _event) \ - CMN_EVENT_ATTR(mxp_##_name, CMN_TYPE_XP, _event, 0) -#define CMN_EVENT_SBSX(_name, _event) \ - CMN_EVENT_ATTR(sbsx_##_name, CMN_TYPE_SBSX, _event, 0) -#define CMN_EVENT_RNID(_name, _event) \ - CMN_EVENT_ATTR(rnid_##_name, CMN_TYPE_RNI, _event, 0) + CMN_EVENT_ATTR(CMN_ANY, mxp_##_name, CMN_TYPE_XP, _event, 0) +#define CMN_EVENT_SBSX(_model, _name, _event) \ + CMN_EVENT_ATTR(_model, sbsx_##_name, CMN_TYPE_SBSX, _event, 0) +#define CMN_EVENT_RNID(_model, _name, _event) \ + CMN_EVENT_ATTR(_model, rnid_##_name, CMN_TYPE_RNI, _event, 0) +#define CMN_EVENT_MTSX(_name, _event) \ + CMN_EVENT_ATTR(CMN_ANY, mtsx_##_name, CMN_TYPE_MTSX, _event, 0) -#define CMN_EVENT_DVM(_name, _event) \ - _CMN_EVENT_DVM(_name, _event, 0) -#define CMN_EVENT_HNF(_name, _event) \ - _CMN_EVENT_HNF(_name, _event, 0) +#define CMN_EVENT_DVM(_model, _name, _event) \ + _CMN_EVENT_DVM(_model, _name, _event, 0) +#define CMN_EVENT_HNF(_model, _name, _event) \ + _CMN_EVENT_HNF(_model, _name, _event, 0) #define _CMN_EVENT_XP(_name, _event) \ __CMN_EVENT_XP(e_##_name, (_event) | (0 << 2)), \ __CMN_EVENT_XP(w_##_name, (_event) | (1 << 2)), \ @@ -445,115 +459,115 @@ static struct attribute *arm_cmn_event_attrs[] = { * slot, but our lazy short-cut of using the DTM counter index for * the PMU index as well happens to avoid that by construction. */ - CMN_EVENT_DVM(rxreq_dvmop, 0x01), - CMN_EVENT_DVM(rxreq_dvmsync, 0x02), - CMN_EVENT_DVM(rxreq_dvmop_vmid_filtered, 0x03), - CMN_EVENT_DVM(rxreq_retried, 0x04), - _CMN_EVENT_DVM(rxreq_trk_occupancy_all, 0x05, 0), - _CMN_EVENT_DVM(rxreq_trk_occupancy_dvmop, 0x05, 1), - _CMN_EVENT_DVM(rxreq_trk_occupancy_dvmsync, 0x05, 2), + CMN_EVENT_DVM(CMN600, rxreq_dvmop, 0x01), + CMN_EVENT_DVM(CMN600, rxreq_dvmsync, 0x02), + CMN_EVENT_DVM(CMN600, rxreq_dvmop_vmid_filtered, 0x03), + CMN_EVENT_DVM(CMN600, rxreq_retried, 0x04), + _CMN_EVENT_DVM(CMN600, rxreq_trk_occupancy_all, 0x05, 0), + _CMN_EVENT_DVM(CMN600, rxreq_trk_occupancy_dvmop, 0x05, 1), + _CMN_EVENT_DVM(CMN600, rxreq_trk_occupancy_dvmsync, 0x05, 2), - CMN_EVENT_HNF(cache_miss, 0x01), - CMN_EVENT_HNF(slc_sf_cache_access, 0x02), - CMN_EVENT_HNF(cache_fill, 0x03), - CMN_EVENT_HNF(pocq_retry, 0x04), - CMN_EVENT_HNF(pocq_reqs_recvd, 0x05), - CMN_EVENT_HNF(sf_hit, 0x06), - CMN_EVENT_HNF(sf_evictions, 0x07), - CMN_EVENT_HNF(dir_snoops_sent, 0x08), - CMN_EVENT_HNF(brd_snoops_sent, 0x09), - CMN_EVENT_HNF(slc_eviction, 0x0a), - CMN_EVENT_HNF(slc_fill_invalid_way, 0x0b), - CMN_EVENT_HNF(mc_retries, 0x0c), - CMN_EVENT_HNF(mc_reqs, 0x0d), - CMN_EVENT_HNF(qos_hh_retry, 0x0e), - _CMN_EVENT_HNF(qos_pocq_occupancy_all, 0x0f, 0), - _CMN_EVENT_HNF(qos_pocq_occupancy_read, 0x0f, 1), - _CMN_EVENT_HNF(qos_pocq_occupancy_write, 0x0f, 2), - _CMN_EVENT_HNF(qos_pocq_occupancy_atomic, 0x0f, 3), - _CMN_EVENT_HNF(qos_pocq_occupancy_stash, 0x0f, 4), - CMN_EVENT_HNF(pocq_addrhaz, 0x10), - CMN_EVENT_HNF(pocq_atomic_addrhaz, 0x11), - CMN_EVENT_HNF(ld_st_swp_adq_full, 0x12), - CMN_EVENT_HNF(cmp_adq_full, 0x13), - CMN_EVENT_HNF(txdat_stall, 0x14), - CMN_EVENT_HNF(txrsp_stall, 0x15), - CMN_EVENT_HNF(seq_full, 0x16), - CMN_EVENT_HNF(seq_hit, 0x17), - CMN_EVENT_HNF(snp_sent, 0x18), - CMN_EVENT_HNF(sfbi_dir_snp_sent, 0x19), - CMN_EVENT_HNF(sfbi_brd_snp_sent, 0x1a), - CMN_EVENT_HNF(snp_sent_untrk, 0x1b), - CMN_EVENT_HNF(intv_dirty, 0x1c), - CMN_EVENT_HNF(stash_snp_sent, 0x1d), - CMN_EVENT_HNF(stash_data_pull, 0x1e), - CMN_EVENT_HNF(snp_fwded, 0x1f), + CMN_EVENT_HNF(CMN_ANY, cache_miss, 0x01), + CMN_EVENT_HNF(CMN_ANY, slc_sf_cache_access, 0x02), + CMN_EVENT_HNF(CMN_ANY, cache_fill, 0x03), + CMN_EVENT_HNF(CMN_ANY, pocq_retry, 0x04), + CMN_EVENT_HNF(CMN_ANY, pocq_reqs_recvd, 0x05), + CMN_EVENT_HNF(CMN_ANY, sf_hit, 0x06), + CMN_EVENT_HNF(CMN_ANY, sf_evictions, 0x07), + CMN_EVENT_HNF(CMN_ANY, dir_snoops_sent, 0x08), + CMN_EVENT_HNF(CMN_ANY, brd_snoops_sent, 0x09), + CMN_EVENT_HNF(CMN_ANY, slc_eviction, 0x0a), + CMN_EVENT_HNF(CMN_ANY, slc_fill_invalid_way, 0x0b), + CMN_EVENT_HNF(CMN_ANY, mc_retries, 0x0c), + CMN_EVENT_HNF(CMN_ANY, mc_reqs, 0x0d), + CMN_EVENT_HNF(CMN_ANY, qos_hh_retry, 0x0e), + _CMN_EVENT_HNF(CMN_ANY, qos_pocq_occupancy_all, 0x0f, 0), + _CMN_EVENT_HNF(CMN_ANY, qos_pocq_occupancy_read, 0x0f, 1), + _CMN_EVENT_HNF(CMN_ANY, qos_pocq_occupancy_write, 0x0f, 2), + _CMN_EVENT_HNF(CMN_ANY, qos_pocq_occupancy_atomic, 0x0f, 3), + _CMN_EVENT_HNF(CMN_ANY, qos_pocq_occupancy_stash, 0x0f, 4), + CMN_EVENT_HNF(CMN_ANY, pocq_addrhaz, 0x10), + CMN_EVENT_HNF(CMN_ANY, pocq_atomic_addrhaz, 0x11), + CMN_EVENT_HNF(CMN_ANY, ld_st_swp_adq_full, 0x12), + CMN_EVENT_HNF(CMN_ANY, cmp_adq_full, 0x13), + CMN_EVENT_HNF(CMN_ANY, txdat_stall, 0x14), + CMN_EVENT_HNF(CMN_ANY, txrsp_stall, 0x15), + CMN_EVENT_HNF(CMN_ANY, seq_full, 0x16), + CMN_EVENT_HNF(CMN_ANY, seq_hit, 0x17), + CMN_EVENT_HNF(CMN_ANY, snp_sent, 0x18), + CMN_EVENT_HNF(CMN_ANY, sfbi_dir_snp_sent, 0x19), + CMN_EVENT_HNF(CMN_ANY, sfbi_brd_snp_sent, 0x1a), + CMN_EVENT_HNF(CMN_ANY, snp_sent_untrk, 0x1b), + CMN_EVENT_HNF(CMN_ANY, intv_dirty, 0x1c), + CMN_EVENT_HNF(CMN_ANY, stash_snp_sent, 0x1d), + CMN_EVENT_HNF(CMN_ANY, stash_data_pull, 0x1e), + CMN_EVENT_HNF(CMN_ANY, snp_fwded, 0x1f), - CMN_EVENT_HNI(rrt_rd_occ_cnt_ovfl, 0x20), - CMN_EVENT_HNI(rrt_wr_occ_cnt_ovfl, 0x21), - CMN_EVENT_HNI(rdt_rd_occ_cnt_ovfl, 0x22), - CMN_EVENT_HNI(rdt_wr_occ_cnt_ovfl, 0x23), - CMN_EVENT_HNI(wdb_occ_cnt_ovfl, 0x24), - CMN_EVENT_HNI(rrt_rd_alloc, 0x25), - CMN_EVENT_HNI(rrt_wr_alloc, 0x26), - CMN_EVENT_HNI(rdt_rd_alloc, 0x27), - CMN_EVENT_HNI(rdt_wr_alloc, 0x28), - CMN_EVENT_HNI(wdb_alloc, 0x29), - CMN_EVENT_HNI(txrsp_retryack, 0x2a), - CMN_EVENT_HNI(arvalid_no_arready, 0x2b), - CMN_EVENT_HNI(arready_no_arvalid, 0x2c), - CMN_EVENT_HNI(awvalid_no_awready, 0x2d), - CMN_EVENT_HNI(awready_no_awvalid, 0x2e), - CMN_EVENT_HNI(wvalid_no_wready, 0x2f), - CMN_EVENT_HNI(txdat_stall, 0x30), - CMN_EVENT_HNI(nonpcie_serialization, 0x31), - CMN_EVENT_HNI(pcie_serialization, 0x32), + CMN_EVENT_HNI(rrt_rd_occ_cnt_ovfl, 0x20), + CMN_EVENT_HNI(rrt_wr_occ_cnt_ovfl, 0x21), + CMN_EVENT_HNI(rdt_rd_occ_cnt_ovfl, 0x22), + CMN_EVENT_HNI(rdt_wr_occ_cnt_ovfl, 0x23), + CMN_EVENT_HNI(wdb_occ_cnt_ovfl, 0x24), + CMN_EVENT_HNI(rrt_rd_alloc, 0x25), + CMN_EVENT_HNI(rrt_wr_alloc, 0x26), + CMN_EVENT_HNI(rdt_rd_alloc, 0x27), + CMN_EVENT_HNI(rdt_wr_alloc, 0x28), + CMN_EVENT_HNI(wdb_alloc, 0x29), + CMN_EVENT_HNI(txrsp_retryack, 0x2a), + CMN_EVENT_HNI(arvalid_no_arready, 0x2b), + CMN_EVENT_HNI(arready_no_arvalid, 0x2c), + CMN_EVENT_HNI(awvalid_no_awready, 0x2d), + CMN_EVENT_HNI(awready_no_awvalid, 0x2e), + CMN_EVENT_HNI(wvalid_no_wready, 0x2f), + CMN_EVENT_HNI(txdat_stall, 0x30), + CMN_EVENT_HNI(nonpcie_serialization, 0x31), + CMN_EVENT_HNI(pcie_serialization, 0x32), - CMN_EVENT_XP(txflit_valid, 0x01), - CMN_EVENT_XP(txflit_stall, 0x02), - CMN_EVENT_XP(partial_dat_flit, 0x03), + CMN_EVENT_XP(txflit_valid, 0x01), + CMN_EVENT_XP(txflit_stall, 0x02), + CMN_EVENT_XP(partial_dat_flit, 0x03), /* We treat watchpoints as a special made-up class of XP events */ - CMN_EVENT_ATTR(watchpoint_up, CMN_TYPE_WP, 0, 0), - CMN_EVENT_ATTR(watchpoint_down, CMN_TYPE_WP, 2, 0), + CMN_EVENT_ATTR(CMN_ANY, watchpoint_up, CMN_TYPE_WP, CMN_WP_UP, 0), + CMN_EVENT_ATTR(CMN_ANY, watchpoint_down, CMN_TYPE_WP, CMN_WP_DOWN, 0), - CMN_EVENT_SBSX(rd_req, 0x01), - CMN_EVENT_SBSX(wr_req, 0x02), - CMN_EVENT_SBSX(cmo_req, 0x03), - CMN_EVENT_SBSX(txrsp_retryack, 0x04), - CMN_EVENT_SBSX(txdat_flitv, 0x05), - CMN_EVENT_SBSX(txrsp_flitv, 0x06), - CMN_EVENT_SBSX(rd_req_trkr_occ_cnt_ovfl, 0x11), - CMN_EVENT_SBSX(wr_req_trkr_occ_cnt_ovfl, 0x12), - CMN_EVENT_SBSX(cmo_req_trkr_occ_cnt_ovfl, 0x13), - CMN_EVENT_SBSX(wdb_occ_cnt_ovfl, 0x14), - CMN_EVENT_SBSX(rd_axi_trkr_occ_cnt_ovfl, 0x15), - CMN_EVENT_SBSX(cmo_axi_trkr_occ_cnt_ovfl, 0x16), - CMN_EVENT_SBSX(arvalid_no_arready, 0x21), - CMN_EVENT_SBSX(awvalid_no_awready, 0x22), - CMN_EVENT_SBSX(wvalid_no_wready, 0x23), - CMN_EVENT_SBSX(txdat_stall, 0x24), - CMN_EVENT_SBSX(txrsp_stall, 0x25), + CMN_EVENT_SBSX(CMN_ANY, rd_req, 0x01), + CMN_EVENT_SBSX(CMN_ANY, wr_req, 0x02), + CMN_EVENT_SBSX(CMN_ANY, cmo_req, 0x03), + CMN_EVENT_SBSX(CMN_ANY, txrsp_retryack, 0x04), + CMN_EVENT_SBSX(CMN_ANY, txdat_flitv, 0x05), + CMN_EVENT_SBSX(CMN_ANY, txrsp_flitv, 0x06), + CMN_EVENT_SBSX(CMN_ANY, rd_req_trkr_occ_cnt_ovfl, 0x11), + CMN_EVENT_SBSX(CMN_ANY, wr_req_trkr_occ_cnt_ovfl, 0x12), + CMN_EVENT_SBSX(CMN_ANY, cmo_req_trkr_occ_cnt_ovfl, 0x13), + CMN_EVENT_SBSX(CMN_ANY, wdb_occ_cnt_ovfl, 0x14), + CMN_EVENT_SBSX(CMN_ANY, rd_axi_trkr_occ_cnt_ovfl, 0x15), + CMN_EVENT_SBSX(CMN_ANY, cmo_axi_trkr_occ_cnt_ovfl, 0x16), + CMN_EVENT_SBSX(CMN_ANY, arvalid_no_arready, 0x21), + CMN_EVENT_SBSX(CMN_ANY, awvalid_no_awready, 0x22), + CMN_EVENT_SBSX(CMN_ANY, wvalid_no_wready, 0x23), + CMN_EVENT_SBSX(CMN_ANY, txdat_stall, 0x24), + CMN_EVENT_SBSX(CMN_ANY, txrsp_stall, 0x25), - CMN_EVENT_RNID(s0_rdata_beats, 0x01), - CMN_EVENT_RNID(s1_rdata_beats, 0x02), - CMN_EVENT_RNID(s2_rdata_beats, 0x03), - CMN_EVENT_RNID(rxdat_flits, 0x04), - CMN_EVENT_RNID(txdat_flits, 0x05), - CMN_EVENT_RNID(txreq_flits_total, 0x06), - CMN_EVENT_RNID(txreq_flits_retried, 0x07), - CMN_EVENT_RNID(rrt_occ_ovfl, 0x08), - CMN_EVENT_RNID(wrt_occ_ovfl, 0x09), - CMN_EVENT_RNID(txreq_flits_replayed, 0x0a), - CMN_EVENT_RNID(wrcancel_sent, 0x0b), - CMN_EVENT_RNID(s0_wdata_beats, 0x0c), - CMN_EVENT_RNID(s1_wdata_beats, 0x0d), - CMN_EVENT_RNID(s2_wdata_beats, 0x0e), - CMN_EVENT_RNID(rrt_alloc, 0x0f), - CMN_EVENT_RNID(wrt_alloc, 0x10), - CMN_EVENT_RNID(rdb_unord, 0x11), - CMN_EVENT_RNID(rdb_replay, 0x12), - CMN_EVENT_RNID(rdb_hybrid, 0x13), - CMN_EVENT_RNID(rdb_ord, 0x14), + CMN_EVENT_RNID(CMN_ANY, s0_rdata_beats, 0x01), + CMN_EVENT_RNID(CMN_ANY, s1_rdata_beats, 0x02), + CMN_EVENT_RNID(CMN_ANY, s2_rdata_beats, 0x03), + CMN_EVENT_RNID(CMN_ANY, rxdat_flits, 0x04), + CMN_EVENT_RNID(CMN_ANY, txdat_flits, 0x05), + CMN_EVENT_RNID(CMN_ANY, txreq_flits_total, 0x06), + CMN_EVENT_RNID(CMN_ANY, txreq_flits_retried, 0x07), + CMN_EVENT_RNID(CMN_ANY, rrt_occ_ovfl, 0x08), + CMN_EVENT_RNID(CMN_ANY, wrt_occ_ovfl, 0x09), + CMN_EVENT_RNID(CMN_ANY, txreq_flits_replayed, 0x0a), + CMN_EVENT_RNID(CMN_ANY, wrcancel_sent, 0x0b), + CMN_EVENT_RNID(CMN_ANY, s0_wdata_beats, 0x0c), + CMN_EVENT_RNID(CMN_ANY, s1_wdata_beats, 0x0d), + CMN_EVENT_RNID(CMN_ANY, s2_wdata_beats, 0x0e), + CMN_EVENT_RNID(CMN_ANY, rrt_alloc, 0x0f), + CMN_EVENT_RNID(CMN_ANY, wrt_alloc, 0x10), + CMN_EVENT_RNID(CMN600, rdb_unord, 0x11), + CMN_EVENT_RNID(CMN600, rdb_replay, 0x12), + CMN_EVENT_RNID(CMN600, rdb_hybrid, 0x13), + CMN_EVENT_RNID(CMN600, rdb_ord, 0x14), NULL }; @@ -1386,15 +1400,14 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) int i, j; size_t sz; - cfg_region = cmn->base + rgn_offset; - reg = readl_relaxed(cfg_region + CMN_CFGM_PERIPH_ID_2); - cmn->rev = FIELD_GET(CMN_CFGM_PID2_REVISION, reg); - dev_dbg(cmn->dev, "periph_id_2 revision: %d\n", cmn->rev); - arm_cmn_init_node_info(cmn, rgn_offset, &cfg); if (cfg.type != CMN_TYPE_CFG) return -ENODEV; + cfg_region = cmn->base + rgn_offset; + reg = readl_relaxed(cfg_region + CMN_CFGM_PERIPH_ID_2); + cmn->rev = FIELD_GET(CMN_CFGM_PID2_REVISION, reg); + reg = readq_relaxed(cfg_region + CMN_CHILD_INFO); child_count = FIELD_GET(CMN_CI_CHILD_COUNT, reg); child_poff = FIELD_GET(CMN_CI_CHILD_PTR_OFFSET, reg); @@ -1507,13 +1520,14 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) cmn->mesh_x = cmn->num_xps; cmn->mesh_y = cmn->num_xps / cmn->mesh_x; + dev_dbg(cmn->dev, "model %d, periph_id_2 revision %d\n", cmn->model, cmn->rev); dev_dbg(cmn->dev, "mesh %dx%d, ID width %d\n", cmn->mesh_x, cmn->mesh_y, arm_cmn_xyidbits(cmn)); return 0; } -static int arm_cmn_acpi_probe(struct platform_device *pdev, struct arm_cmn *cmn) +static int arm_cmn600_acpi_probe(struct platform_device *pdev, struct arm_cmn *cmn) { struct resource *cfg, *root; @@ -1540,21 +1554,11 @@ static int arm_cmn_acpi_probe(struct platform_device *pdev, struct arm_cmn *cmn) return root->start - cfg->start; } -static int arm_cmn_of_probe(struct platform_device *pdev, struct arm_cmn *cmn) +static int arm_cmn600_of_probe(struct device_node *np) { - struct device_node *np = pdev->dev.of_node; u32 rootnode; - int ret; - cmn->base = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(cmn->base)) - return PTR_ERR(cmn->base); - - ret = of_property_read_u32(np, "arm,root-node", &rootnode); - if (ret) - return ret; - - return rootnode; + return of_property_read_u32(np, "arm,root-node", &rootnode) ?: rootnode; } static int arm_cmn_probe(struct platform_device *pdev) @@ -1569,12 +1573,19 @@ static int arm_cmn_probe(struct platform_device *pdev) return -ENOMEM; cmn->dev = &pdev->dev; + cmn->model = (unsigned long)device_get_match_data(cmn->dev); platform_set_drvdata(pdev, cmn); - if (has_acpi_companion(cmn->dev)) - rootnode = arm_cmn_acpi_probe(pdev, cmn); - else - rootnode = arm_cmn_of_probe(pdev, cmn); + if (cmn->model == CMN600 && has_acpi_companion(cmn->dev)) { + rootnode = arm_cmn600_acpi_probe(pdev, cmn); + } else { + rootnode = 0; + cmn->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(cmn->base)) + return PTR_ERR(cmn->base); + if (cmn->model == CMN600) + rootnode = arm_cmn600_of_probe(pdev->dev.of_node); + } if (rootnode < 0) return rootnode; @@ -1637,7 +1648,7 @@ static int arm_cmn_remove(struct platform_device *pdev) #ifdef CONFIG_OF static const struct of_device_id arm_cmn_of_match[] = { - { .compatible = "arm,cmn-600", }, + { .compatible = "arm,cmn-600", .data = (void *)CMN600 }, {} }; MODULE_DEVICE_TABLE(of, arm_cmn_of_match); @@ -1645,7 +1656,7 @@ MODULE_DEVICE_TABLE(of, arm_cmn_of_match); #ifdef CONFIG_ACPI static const struct acpi_device_id arm_cmn_acpi_match[] = { - { "ARMHC600", }, + { "ARMHC600", CMN600 }, {} }; MODULE_DEVICE_TABLE(acpi, arm_cmn_acpi_match); From 60d1504070c22c059a1e11bc3fd444953da988c1 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 3 Dec 2021 11:45:00 +0000 Subject: [PATCH 11/14] perf/arm-cmn: Support new IP features The second generation of CMN IPs add new node types and significantly expand the configuration space with options for extra device ports on edge XPs, either plumbed into the regular DTM or with extra dedicated DTMs to monitor them, plus larger (and smaller) mesh sizes. Add basic support for pulling this new information out of the hardware, piping it around as necessary, and handling (most of) the new choices. Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/e58b495bcc7deec3882be4bac910ed0bf6979674.1638530442.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/perf/arm-cmn.c | 222 ++++++++++++++++++++++++++++++++--------- 1 file changed, 173 insertions(+), 49 deletions(-) diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index ce94f923a607..0a3f33a83c01 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -24,7 +24,10 @@ #define CMN_NI_LOGICAL_ID GENMASK_ULL(47, 32) #define CMN_NODEID_DEVID(reg) ((reg) & 3) +#define CMN_NODEID_EXT_DEVID(reg) ((reg) & 1) #define CMN_NODEID_PID(reg) (((reg) >> 2) & 1) +#define CMN_NODEID_EXT_PID(reg) (((reg) >> 1) & 3) +#define CMN_NODEID_1x1_PID(reg) (((reg) >> 2) & 7) #define CMN_NODEID_X(reg, bits) ((reg) >> (3 + (bits))) #define CMN_NODEID_Y(reg, bits) (((reg) >> 3) & ((1U << (bits)) - 1)) @@ -37,13 +40,26 @@ #define CMN_MAX_DIMENSION 8 #define CMN_MAX_XPS (CMN_MAX_DIMENSION * CMN_MAX_DIMENSION) -#define CMN_MAX_DTMS CMN_MAX_XPS +#define CMN_MAX_DTMS (CMN_MAX_XPS + (CMN_MAX_DIMENSION - 1) * 4) -/* The CFG node has one other useful purpose */ +/* The CFG node has various info besides the discovery tree */ #define CMN_CFGM_PERIPH_ID_2 0x0010 #define CMN_CFGM_PID2_REVISION GENMASK(7, 4) -/* PMU registers occupy the 3rd 4KB page of each node's 16KB space */ +#define CMN_CFGM_INFO_GLOBAL 0x900 +#define CMN_INFO_MULTIPLE_DTM_EN BIT_ULL(63) +#define CMN_INFO_RSP_VC_NUM GENMASK_ULL(53, 52) +#define CMN_INFO_DAT_VC_NUM GENMASK_ULL(51, 50) + +/* XPs also have some local topology info which has uses too */ +#define CMN_MXP__CONNECT_INFO_P0 0x0008 +#define CMN_MXP__CONNECT_INFO_P1 0x0010 +#define CMN_MXP__CONNECT_INFO_P2 0x0028 +#define CMN_MXP__CONNECT_INFO_P3 0x0030 +#define CMN_MXP__CONNECT_INFO_P4 0x0038 +#define CMN_MXP__CONNECT_INFO_P5 0x0040 + +/* PMU registers occupy the 3rd 4KB page of each node's region */ #define CMN_PMU_OFFSET 0x2000 /* For most nodes, this is all there is */ @@ -53,6 +69,7 @@ /* DTMs live in the PMU space of XP registers */ #define CMN_DTM_WPn(n) (0x1A0 + (n) * 0x18) #define CMN_DTM_WPn_CONFIG(n) (CMN_DTM_WPn(n) + 0x00) +#define CMN_DTM_WPn_CONFIG_WP_DEV_SEL2 GENMASK_ULL(18,17) #define CMN_DTM_WPn_CONFIG_WP_COMBINE BIT(6) #define CMN_DTM_WPn_CONFIG_WP_EXCLUSIVE BIT(5) #define CMN_DTM_WPn_CONFIG_WP_GRP BIT(4) @@ -77,7 +94,11 @@ #define CMN_DTM_PMEVCNTSR 0x240 +#define CMN_DTM_UNIT_INFO 0x0910 + #define CMN_DTM_NUM_COUNTERS 4 +/* Want more local counters? Why not replicate the whole DTM! Ugh... */ +#define CMN_DTM_OFFSET(n) ((n) * 0x200) /* The DTC node is where the magic happens */ #define CMN_DT_DTC_CTL 0x0a00 @@ -131,10 +152,10 @@ #define CMN_EVENT_NODEID(event) FIELD_GET(CMN_CONFIG_NODEID, (event)->attr.config) #define CMN_CONFIG_WP_COMBINE GENMASK_ULL(27, 24) -#define CMN_CONFIG_WP_DEV_SEL BIT_ULL(48) -#define CMN_CONFIG_WP_CHN_SEL GENMASK_ULL(50, 49) -#define CMN_CONFIG_WP_GRP BIT_ULL(52) -#define CMN_CONFIG_WP_EXCLUSIVE BIT_ULL(53) +#define CMN_CONFIG_WP_DEV_SEL GENMASK_ULL(50, 48) +#define CMN_CONFIG_WP_CHN_SEL GENMASK_ULL(55, 51) +#define CMN_CONFIG_WP_GRP BIT_ULL(56) +#define CMN_CONFIG_WP_EXCLUSIVE BIT_ULL(57) #define CMN_CONFIG1_WP_VAL GENMASK_ULL(63, 0) #define CMN_CONFIG2_WP_MASK GENMASK_ULL(63, 0) @@ -176,9 +197,12 @@ enum cmn_node_type { CMN_TYPE_HNF, CMN_TYPE_XP, CMN_TYPE_SBSX, - CMN_TYPE_RNI = 0xa, + CMN_TYPE_MPAM_S, + CMN_TYPE_MPAM_NS, + CMN_TYPE_RNI, CMN_TYPE_RND = 0xd, CMN_TYPE_RNSAM = 0xf, + CMN_TYPE_MTSX, CMN_TYPE_CXRA = 0x100, CMN_TYPE_CXHA = 0x101, CMN_TYPE_CXLA = 0x102, @@ -233,6 +257,7 @@ struct arm_cmn_dtc { struct arm_cmn { struct device *dev; void __iomem *base; + unsigned int state; enum cmn_revision rev; enum cmn_model model; @@ -240,6 +265,13 @@ struct arm_cmn { u8 mesh_y; u16 num_xps; u16 num_dns; + bool multi_dtm; + u8 ports_used; + struct { + unsigned int rsp_vc_num : 2; + unsigned int dat_vc_num : 2; + }; + struct arm_cmn_node *xps; struct arm_cmn_node *dns; @@ -250,7 +282,6 @@ struct arm_cmn { int cpu; struct hlist_node cpuhp_node; - unsigned int state; struct pmu pmu; }; @@ -275,13 +306,25 @@ static int arm_cmn_xyidbits(const struct arm_cmn *cmn) static struct arm_cmn_nodeid arm_cmn_nid(const struct arm_cmn *cmn, u16 id) { struct arm_cmn_nodeid nid; - int bits = arm_cmn_xyidbits(cmn); - nid.x = CMN_NODEID_X(id, bits); - nid.y = CMN_NODEID_Y(id, bits); - nid.port = CMN_NODEID_PID(id); - nid.dev = CMN_NODEID_DEVID(id); + if (cmn->num_xps == 1) { + nid.x = 0; + nid.y = 0; + nid.port = CMN_NODEID_1x1_PID(id); + nid.dev = CMN_NODEID_DEVID(id); + } else { + int bits = arm_cmn_xyidbits(cmn); + nid.x = CMN_NODEID_X(id, bits); + nid.y = CMN_NODEID_Y(id, bits); + if (cmn->ports_used & 0xc) { + nid.port = CMN_NODEID_EXT_PID(id); + nid.dev = CMN_NODEID_EXT_DEVID(id); + } else { + nid.port = CMN_NODEID_PID(id); + nid.dev = CMN_NODEID_DEVID(id); + } + } return nid; } @@ -310,6 +353,7 @@ struct arm_cmn_hw_event { unsigned int dtc_idx; u8 dtcs_used; u8 num_dns; + u8 dtm_offset; }; #define for_each_hw_dn(hw, dn, i) \ @@ -354,7 +398,8 @@ struct arm_cmn_format_attr { .occupid = _occupid, \ }})[0].attr.attr) -static bool arm_cmn_is_occup_event(enum cmn_node_type type, unsigned int id) +static bool arm_cmn_is_occup_event(enum cmn_model model, + enum cmn_node_type type, unsigned int id) { return (type == CMN_TYPE_DVM && id == 0x05) || (type == CMN_TYPE_HNF && id == 0x0f); @@ -375,7 +420,7 @@ static ssize_t arm_cmn_event_show(struct device *dev, "type=0x%x,eventid=0x%x,wp_dev_sel=?,wp_chn_sel=?,wp_grp=?,wp_val=?,wp_mask=?\n", eattr->type, eattr->eventid); - if (arm_cmn_is_occup_event(eattr->type, eattr->eventid)) + if (arm_cmn_is_occup_event(eattr->model, eattr->type, eattr->eventid)) return sysfs_emit(buf, "type=0x%x,eventid=0x%x,occupid=0x%x\n", eattr->type, eattr->eventid, eattr->occupid); @@ -390,25 +435,36 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj, struct device *dev = kobj_to_dev(kobj); struct arm_cmn *cmn = to_cmn(dev_get_drvdata(dev)); struct arm_cmn_event_attr *eattr; - enum cmn_node_type type; eattr = container_of(attr, typeof(*eattr), attr.attr); - type = eattr->type; if (!(eattr->model & cmn->model)) return 0; - /* Watchpoints aren't nodes */ - if (type == CMN_TYPE_WP) - type = CMN_TYPE_XP; + /* Watchpoints aren't nodes, so avoid confusion */ + if (eattr->type == CMN_TYPE_WP) + return attr->mode; - /* Revision-specific differences */ - if (cmn->model == CMN600 && cmn->rev < CMN600_R1P2) { - if (type == CMN_TYPE_HNF && eattr->eventid == 0x1b) + /* Hide XP events for unused interfaces/channels */ + if (eattr->type == CMN_TYPE_XP) { + unsigned int intf = (eattr->eventid >> 2) & 7; + unsigned int chan = eattr->eventid >> 5; + + if ((intf & 4) && !(cmn->ports_used & BIT(intf & 3))) + return 0; + + if ((chan == 5 && cmn->rsp_vc_num < 2) || + (chan == 6 && cmn->dat_vc_num < 2)) return 0; } - if (!arm_cmn_node(cmn, type)) + /* Revision-specific differences */ + if (cmn->model == CMN600 && cmn->rev < CMN600_R1P2) { + if (eattr->type == CMN_TYPE_HNF && eattr->eventid == 0x1b) + return 0; + } + + if (!arm_cmn_node(cmn, eattr->type)) return 0; return attr->mode; @@ -669,7 +725,8 @@ static u32 arm_cmn_wp_config(struct perf_event *event) config = FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_DEV_SEL, dev) | FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_CHN_SEL, chn) | FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_GRP, grp) | - FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_EXCLUSIVE, exc); + FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_EXCLUSIVE, exc) | + FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_DEV_SEL2, dev >> 1); if (combine && !grp) config |= CMN_DTM_WPn_CONFIG_WP_COMBINE; @@ -712,7 +769,7 @@ static u64 arm_cmn_read_dtm(struct arm_cmn *cmn, struct arm_cmn_hw_event *hw, offset = snapshot ? CMN_DTM_PMEVCNTSR : CMN_DTM_PMEVCNT; for_each_hw_dn(hw, dn, i) { if (dtm != &cmn->dtms[dn->dtm]) { - dtm = &cmn->dtms[dn->dtm]; + dtm = &cmn->dtms[dn->dtm] + hw->dtm_offset; reg = readq_relaxed(dtm->base + offset); } dtm_idx = arm_cmn_get_index(hw->dtm_idx, i); @@ -800,8 +857,10 @@ static void arm_cmn_event_start(struct perf_event *event, int flags) u64 mask = CMN_EVENT_WP_MASK(event); for_each_hw_dn(hw, dn, i) { - writeq_relaxed(val, dn->pmu_base + CMN_DTM_WPn_VAL(wp_idx)); - writeq_relaxed(mask, dn->pmu_base + CMN_DTM_WPn_MASK(wp_idx)); + void __iomem *base = dn->pmu_base + CMN_DTM_OFFSET(hw->dtm_offset); + + writeq_relaxed(val, base + CMN_DTM_WPn_VAL(wp_idx)); + writeq_relaxed(mask, base + CMN_DTM_WPn_MASK(wp_idx)); } } else for_each_hw_dn(hw, dn, i) { int dtm_idx = arm_cmn_get_index(hw->dtm_idx, i); @@ -826,8 +885,10 @@ static void arm_cmn_event_stop(struct perf_event *event, int flags) int wp_idx = arm_cmn_wp_idx(event); for_each_hw_dn(hw, dn, i) { - writeq_relaxed(0, dn->pmu_base + CMN_DTM_WPn_MASK(wp_idx)); - writeq_relaxed(~0ULL, dn->pmu_base + CMN_DTM_WPn_VAL(wp_idx)); + void __iomem *base = dn->pmu_base + CMN_DTM_OFFSET(hw->dtm_offset); + + writeq_relaxed(0, base + CMN_DTM_WPn_MASK(wp_idx)); + writeq_relaxed(~0ULL, base + CMN_DTM_WPn_VAL(wp_idx)); } } else for_each_hw_dn(hw, dn, i) { int dtm_idx = arm_cmn_get_index(hw->dtm_idx, i); @@ -847,7 +908,8 @@ struct arm_cmn_val { bool cycles; }; -static void arm_cmn_val_add_event(struct arm_cmn_val *val, struct perf_event *event) +static void arm_cmn_val_add_event(struct arm_cmn *cmn, struct arm_cmn_val *val, + struct perf_event *event) { struct arm_cmn_hw_event *hw = to_cmn_hw(event); struct arm_cmn_node *dn; @@ -865,7 +927,7 @@ static void arm_cmn_val_add_event(struct arm_cmn_val *val, struct perf_event *ev } val->dtc_count++; - if (arm_cmn_is_occup_event(type, CMN_EVENT_EVENTID(event))) + if (arm_cmn_is_occup_event(cmn->model, type, CMN_EVENT_EVENTID(event))) occupid = CMN_EVENT_OCCUPID(event) + 1; else occupid = 0; @@ -884,7 +946,7 @@ static void arm_cmn_val_add_event(struct arm_cmn_val *val, struct perf_event *ev } } -static int arm_cmn_validate_group(struct perf_event *event) +static int arm_cmn_validate_group(struct arm_cmn *cmn, struct perf_event *event) { struct arm_cmn_hw_event *hw = to_cmn_hw(event); struct arm_cmn_node *dn; @@ -904,9 +966,9 @@ static int arm_cmn_validate_group(struct perf_event *event) if (!val) return -ENOMEM; - arm_cmn_val_add_event(val, leader); + arm_cmn_val_add_event(cmn, val, leader); for_each_sibling_event(sibling, leader) - arm_cmn_val_add_event(val, sibling); + arm_cmn_val_add_event(cmn, val, sibling); type = CMN_EVENT_TYPE(event); if (type == CMN_TYPE_DTC) { @@ -917,7 +979,7 @@ static int arm_cmn_validate_group(struct perf_event *event) if (val->dtc_count == CMN_DT_NUM_COUNTERS) goto done; - if (arm_cmn_is_occup_event(type, CMN_EVENT_EVENTID(event))) + if (arm_cmn_is_occup_event(cmn->model, type, CMN_EVENT_EVENTID(event))) occupid = CMN_EVENT_OCCUPID(event) + 1; else occupid = 0; @@ -980,6 +1042,9 @@ static int arm_cmn_event_init(struct perf_event *event) eventid = CMN_EVENT_EVENTID(event); if (eventid != CMN_WP_UP && eventid != CMN_WP_DOWN) return -EINVAL; + /* ...but the DTM may depend on which port we're watching */ + if (cmn->multi_dtm) + hw->dtm_offset = CMN_EVENT_WP_DEV_SEL(event) / 2; } bynodeid = CMN_EVENT_BYNODEID(event); @@ -1007,7 +1072,7 @@ static int arm_cmn_event_init(struct perf_event *event) return -EINVAL; } - return arm_cmn_validate_group(event); + return arm_cmn_validate_group(cmn, event); } static void arm_cmn_event_clear(struct arm_cmn *cmn, struct perf_event *event, @@ -1017,13 +1082,13 @@ static void arm_cmn_event_clear(struct arm_cmn *cmn, struct perf_event *event, enum cmn_node_type type = CMN_EVENT_TYPE(event); while (i--) { - struct arm_cmn_dtm *dtm = &cmn->dtms[hw->dn[i].dtm]; + struct arm_cmn_dtm *dtm = &cmn->dtms[hw->dn[i].dtm] + hw->dtm_offset; unsigned int dtm_idx = arm_cmn_get_index(hw->dtm_idx, i); if (type == CMN_TYPE_WP) dtm->wp_event[arm_cmn_wp_idx(event)] = -1; - if (arm_cmn_is_occup_event(type, CMN_EVENT_EVENTID(event))) + if (arm_cmn_is_occup_event(cmn->model, type, CMN_EVENT_EVENTID(event))) hw->dn[i].occupid_count--; dtm->pmu_config_low &= ~CMN__PMEVCNT_PAIRED(dtm_idx); @@ -1069,7 +1134,7 @@ static int arm_cmn_event_add(struct perf_event *event, int flags) /* ...then the local counters to feed it. */ for_each_hw_dn(hw, dn, i) { - struct arm_cmn_dtm *dtm = &cmn->dtms[dn->dtm]; + struct arm_cmn_dtm *dtm = &cmn->dtms[dn->dtm] + hw->dtm_offset; unsigned int dtm_idx, shift; u64 reg; @@ -1098,10 +1163,13 @@ static int arm_cmn_event_add(struct perf_event *event, int flags) } else { struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id); + if (cmn->multi_dtm) + nid.port %= 2; + input_sel = CMN__PMEVCNT0_INPUT_SEL_DEV + dtm_idx + (nid.port << 4) + (nid.dev << 2); - if (arm_cmn_is_occup_event(type, CMN_EVENT_EVENTID(event))) { + if (arm_cmn_is_occup_event(cmn->model, type, CMN_EVENT_EVENTID(event))) { u8 occupid = CMN_EVENT_OCCUPID(event); if (dn->occupid_count == 0) { @@ -1283,11 +1351,11 @@ static int arm_cmn_init_irqs(struct arm_cmn *cmn) return 0; } -static void arm_cmn_init_dtm(struct arm_cmn_dtm *dtm, struct arm_cmn_node *xp) +static void arm_cmn_init_dtm(struct arm_cmn_dtm *dtm, struct arm_cmn_node *xp, int idx) { int i; - dtm->base = xp->pmu_base; + dtm->base = xp->pmu_base + CMN_DTM_OFFSET(idx); dtm->pmu_config_low = CMN_DTM_PMU_CONFIG_PMU_EN; for (i = 0; i < 4; i++) { dtm->wp_event[i] = -1; @@ -1345,6 +1413,8 @@ static int arm_cmn_init_dtcs(struct arm_cmn *cmn) xp = arm_cmn_node_to_xp(cmn, dn); dn->dtm = xp->dtm; + if (cmn->multi_dtm) + dn->dtm += arm_cmn_nid(cmn, dn->id).port / 2; if (dn->type == CMN_TYPE_DTC) { int err; @@ -1408,6 +1478,11 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) reg = readl_relaxed(cfg_region + CMN_CFGM_PERIPH_ID_2); cmn->rev = FIELD_GET(CMN_CFGM_PID2_REVISION, reg); + reg = readq_relaxed(cfg_region + CMN_CFGM_INFO_GLOBAL); + cmn->multi_dtm = reg & CMN_INFO_MULTIPLE_DTM_EN; + cmn->rsp_vc_num = FIELD_GET(CMN_INFO_RSP_VC_NUM, reg); + cmn->dat_vc_num = FIELD_GET(CMN_INFO_DAT_VC_NUM, reg); + reg = readq_relaxed(cfg_region + CMN_CHILD_INFO); child_count = FIELD_GET(CMN_CI_CHILD_COUNT, reg); child_poff = FIELD_GET(CMN_CI_CHILD_PTR_OFFSET, reg); @@ -1429,7 +1504,11 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) if (!dn) return -ENOMEM; - dtm = devm_kcalloc(cmn->dev, cmn->num_xps, sizeof(*dtm), GFP_KERNEL); + /* Initial safe upper bound on DTMs for any possible mesh layout */ + i = cmn->num_xps; + if (cmn->multi_dtm) + i += cmn->num_xps + 1; + dtm = devm_kcalloc(cmn->dev, i, sizeof(*dtm), GFP_KERNEL); if (!dtm) return -ENOMEM; @@ -1439,6 +1518,7 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) for (i = 0; i < cmn->num_xps; i++) { void __iomem *xp_region = cmn->base + xp_offset[i]; struct arm_cmn_node *xp = dn++; + unsigned int xp_ports = 0; arm_cmn_init_node_info(cmn, xp_offset[i], xp); /* @@ -1450,9 +1530,39 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) if (xp->id == (1 << 3)) cmn->mesh_x = xp->logid; - xp->dtc = 0xf; + if (cmn->model == CMN600) + xp->dtc = 0xf; + else + xp->dtc = 1 << readl_relaxed(xp_region + CMN_DTM_UNIT_INFO); + xp->dtm = dtm - cmn->dtms; - arm_cmn_init_dtm(dtm++, xp); + arm_cmn_init_dtm(dtm++, xp, 0); + /* + * Keeping track of connected ports will let us filter out + * unnecessary XP events easily. We can also reliably infer the + * "extra device ports" configuration for the node ID format + * from this, since in that case we will see at least one XP + * with port 2 connected, for the HN-D. + */ + if (readq_relaxed(xp_region + CMN_MXP__CONNECT_INFO_P0)) + xp_ports |= BIT(0); + if (readq_relaxed(xp_region + CMN_MXP__CONNECT_INFO_P1)) + xp_ports |= BIT(1); + if (readq_relaxed(xp_region + CMN_MXP__CONNECT_INFO_P2)) + xp_ports |= BIT(2); + if (readq_relaxed(xp_region + CMN_MXP__CONNECT_INFO_P3)) + xp_ports |= BIT(3); + if (readq_relaxed(xp_region + CMN_MXP__CONNECT_INFO_P4)) + xp_ports |= BIT(4); + if (readq_relaxed(xp_region + CMN_MXP__CONNECT_INFO_P5)) + xp_ports |= BIT(5); + + if (cmn->multi_dtm && (xp_ports & 0xc)) + arm_cmn_init_dtm(dtm++, xp, 1); + if (cmn->multi_dtm && (xp_ports & 0x30)) + arm_cmn_init_dtm(dtm++, xp, 2); + + cmn->ports_used |= xp_ports; reg = readq_relaxed(xp_region + CMN_CHILD_INFO); child_count = FIELD_GET(CMN_CI_CHILD_COUNT, reg); @@ -1488,11 +1598,14 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) case CMN_TYPE_SBSX: case CMN_TYPE_RNI: case CMN_TYPE_RND: + case CMN_TYPE_MTSX: case CMN_TYPE_CXRA: case CMN_TYPE_CXHA: dn++; break; /* Nothing to see here */ + case CMN_TYPE_MPAM_S: + case CMN_TYPE_MPAM_NS: case CMN_TYPE_RNSAM: case CMN_TYPE_CXLA: break; @@ -1512,6 +1625,11 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) if (dn) cmn->dns = dn; + sz = (void *)dtm - (void *)cmn->dtms; + dtm = devm_krealloc(cmn->dev, cmn->dtms, sz, GFP_KERNEL); + if (dtm) + cmn->dtms = dtm; + /* * If mesh_x wasn't set during discovery then we never saw * an XP at (0,1), thus we must have an Nx1 configuration. @@ -1520,9 +1638,15 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) cmn->mesh_x = cmn->num_xps; cmn->mesh_y = cmn->num_xps / cmn->mesh_x; + /* 1x1 config plays havoc with XP event encodings */ + if (cmn->num_xps == 1) + dev_warn(cmn->dev, "1x1 config not fully supported, translate XP events manually\n"); + dev_dbg(cmn->dev, "model %d, periph_id_2 revision %d\n", cmn->model, cmn->rev); - dev_dbg(cmn->dev, "mesh %dx%d, ID width %d\n", - cmn->mesh_x, cmn->mesh_y, arm_cmn_xyidbits(cmn)); + reg = cmn->ports_used; + dev_dbg(cmn->dev, "mesh %dx%d, ID width %d, ports %6pbl%s\n", + cmn->mesh_x, cmn->mesh_y, arm_cmn_xyidbits(cmn), ®, + cmn->multi_dtm ? ", multi-DTM" : ""); return 0; } From e310644724e195b79cf8114ca5297e4cdb36c955 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 3 Dec 2021 11:45:01 +0000 Subject: [PATCH 12/14] dt-bindings: perf: arm-cmn: Add CI-700 CI-700 is a new client-level coherent interconnect derived from the enterprise-level CMN family, and shares the same PMU design. CC: devicetree@vger.kernel.org Signed-off-by: Robin Murphy Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/5f0b372f808f1468e6d9500cedafbecd10254674.1638530442.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- .../devicetree/bindings/perf/arm,cmn.yaml | 21 ++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/Documentation/devicetree/bindings/perf/arm,cmn.yaml b/Documentation/devicetree/bindings/perf/arm,cmn.yaml index 42424ccbdd0c..2d4219ec7eda 100644 --- a/Documentation/devicetree/bindings/perf/arm,cmn.yaml +++ b/Documentation/devicetree/bindings/perf/arm,cmn.yaml @@ -12,12 +12,14 @@ maintainers: properties: compatible: - const: arm,cmn-600 + enum: + - arm,cmn-600 + - arm,ci-700 reg: items: - description: Physical address of the base (PERIPHBASE) and - size (up to 64MB) of the configuration address space. + size of the configuration address space. interrupts: minItems: 1 @@ -31,14 +33,23 @@ properties: arm,root-node: $ref: /schemas/types.yaml#/definitions/uint32 - description: Offset from PERIPHBASE of the configuration - discovery node (see TRM definition of ROOTNODEBASE). + description: Offset from PERIPHBASE of CMN-600's configuration + discovery node (see TRM definition of ROOTNODEBASE). Not + relevant for newer CMN/CI products. required: - compatible - reg - interrupts - - arm,root-node + +if: + properties: + compatible: + contains: + const: arm,cmn-600 +then: + required: + - arm,root-node additionalProperties: false From b2fea780c9282dbaf77ef081e6d97a3f2c0dfc6a Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 3 Dec 2021 11:45:02 +0000 Subject: [PATCH 13/14] perf/arm-cmn: Add CI-700 Support Add the identifiers and events for the CI-700 coherent interconnect. Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/28f566ab23a83733c6c9ef9414c010b760b4549c.1638530442.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/perf/arm-cmn.c | 57 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 53 insertions(+), 4 deletions(-) diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index 0a3f33a83c01..28ab87a6cde4 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -175,6 +175,7 @@ enum cmn_model { CMN_ANY = -1, CMN600 = 1, + CI700 = 2, }; /* CMN-600 r0px shouldn't exist in silicon, thankfully */ @@ -186,6 +187,9 @@ enum cmn_revision { CMN600_R2P0, CMN600_R3P0, CMN600_R3P1, + CI700_R0P0 = 0, + CI700_R1P0, + CI700_R2P0, }; enum cmn_node_type { @@ -401,8 +405,10 @@ struct arm_cmn_format_attr { static bool arm_cmn_is_occup_event(enum cmn_model model, enum cmn_node_type type, unsigned int id) { - return (type == CMN_TYPE_DVM && id == 0x05) || - (type == CMN_TYPE_HNF && id == 0x0f); + if (type == CMN_TYPE_DVM) + return (model == CMN600 && id == 0x05) || + (model == CI700 && id == 0x0c); + return type == CMN_TYPE_HNF && id == 0x0f; } static ssize_t arm_cmn_event_show(struct device *dev, @@ -497,14 +503,19 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj, __CMN_EVENT_XP(n_##_name, (_event) | (2 << 2)), \ __CMN_EVENT_XP(s_##_name, (_event) | (3 << 2)), \ __CMN_EVENT_XP(p0_##_name, (_event) | (4 << 2)), \ - __CMN_EVENT_XP(p1_##_name, (_event) | (5 << 2)) + __CMN_EVENT_XP(p1_##_name, (_event) | (5 << 2)), \ + __CMN_EVENT_XP(p2_##_name, (_event) | (6 << 2)), \ + __CMN_EVENT_XP(p3_##_name, (_event) | (7 << 2)) /* Good thing there are only 3 fundamental XP events... */ #define CMN_EVENT_XP(_name, _event) \ _CMN_EVENT_XP(req_##_name, (_event) | (0 << 5)), \ _CMN_EVENT_XP(rsp_##_name, (_event) | (1 << 5)), \ _CMN_EVENT_XP(snp_##_name, (_event) | (2 << 5)), \ - _CMN_EVENT_XP(dat_##_name, (_event) | (3 << 5)) + _CMN_EVENT_XP(dat_##_name, (_event) | (3 << 5)), \ + _CMN_EVENT_XP(pub_##_name, (_event) | (4 << 5)), \ + _CMN_EVENT_XP(rsp2_##_name, (_event) | (5 << 5)), \ + _CMN_EVENT_XP(dat2_##_name, (_event) | (6 << 5)) static struct attribute *arm_cmn_event_attrs[] = { @@ -522,6 +533,20 @@ static struct attribute *arm_cmn_event_attrs[] = { _CMN_EVENT_DVM(CMN600, rxreq_trk_occupancy_all, 0x05, 0), _CMN_EVENT_DVM(CMN600, rxreq_trk_occupancy_dvmop, 0x05, 1), _CMN_EVENT_DVM(CMN600, rxreq_trk_occupancy_dvmsync, 0x05, 2), + CMN_EVENT_DVM(CI700, dvmop_tlbi, 0x01), + CMN_EVENT_DVM(CI700, dvmop_bpi, 0x02), + CMN_EVENT_DVM(CI700, dvmop_pici, 0x03), + CMN_EVENT_DVM(CI700, dvmop_vici, 0x04), + CMN_EVENT_DVM(CI700, dvmsync, 0x05), + CMN_EVENT_DVM(CI700, vmid_filtered, 0x06), + CMN_EVENT_DVM(CI700, rndop_filtered, 0x07), + CMN_EVENT_DVM(CI700, retry, 0x08), + CMN_EVENT_DVM(CI700, txsnp_flitv, 0x09), + CMN_EVENT_DVM(CI700, txsnp_stall, 0x0a), + CMN_EVENT_DVM(CI700, trkfull, 0x0b), + _CMN_EVENT_DVM(CI700, trk_occupancy_all, 0x0c, 0), + _CMN_EVENT_DVM(CI700, trk_occupancy_dvmop, 0x0c, 1), + _CMN_EVENT_DVM(CI700, trk_occupancy_dvmsync, 0x0c, 2), CMN_EVENT_HNF(CMN_ANY, cache_miss, 0x01), CMN_EVENT_HNF(CMN_ANY, slc_sf_cache_access, 0x02), @@ -558,6 +583,9 @@ static struct attribute *arm_cmn_event_attrs[] = { CMN_EVENT_HNF(CMN_ANY, stash_snp_sent, 0x1d), CMN_EVENT_HNF(CMN_ANY, stash_data_pull, 0x1e), CMN_EVENT_HNF(CMN_ANY, snp_fwded, 0x1f), + CMN_EVENT_HNF(CI700, atomic_fwd, 0x20), + CMN_EVENT_HNF(CI700, mpam_hardlim, 0x21), + CMN_EVENT_HNF(CI700, mpam_softlim, 0x22), CMN_EVENT_HNI(rrt_rd_occ_cnt_ovfl, 0x20), CMN_EVENT_HNI(rrt_wr_occ_cnt_ovfl, 0x21), @@ -598,6 +626,7 @@ static struct attribute *arm_cmn_event_attrs[] = { CMN_EVENT_SBSX(CMN_ANY, wdb_occ_cnt_ovfl, 0x14), CMN_EVENT_SBSX(CMN_ANY, rd_axi_trkr_occ_cnt_ovfl, 0x15), CMN_EVENT_SBSX(CMN_ANY, cmo_axi_trkr_occ_cnt_ovfl, 0x16), + CMN_EVENT_SBSX(CI700, rdb_occ_cnt_ovfl, 0x17), CMN_EVENT_SBSX(CMN_ANY, arvalid_no_arready, 0x21), CMN_EVENT_SBSX(CMN_ANY, awvalid_no_awready, 0x22), CMN_EVENT_SBSX(CMN_ANY, wvalid_no_wready, 0x23), @@ -624,6 +653,25 @@ static struct attribute *arm_cmn_event_attrs[] = { CMN_EVENT_RNID(CMN600, rdb_replay, 0x12), CMN_EVENT_RNID(CMN600, rdb_hybrid, 0x13), CMN_EVENT_RNID(CMN600, rdb_ord, 0x14), + CMN_EVENT_RNID(CI700, padb_occ_ovfl, 0x11), + CMN_EVENT_RNID(CI700, rpdb_occ_ovfl, 0x12), + CMN_EVENT_RNID(CI700, rrt_occup_ovfl_slice1, 0x13), + CMN_EVENT_RNID(CI700, rrt_occup_ovfl_slice2, 0x14), + CMN_EVENT_RNID(CI700, rrt_occup_ovfl_slice3, 0x15), + CMN_EVENT_RNID(CI700, wrt_throttled, 0x16), + + CMN_EVENT_MTSX(tc_lookup, 0x01), + CMN_EVENT_MTSX(tc_fill, 0x02), + CMN_EVENT_MTSX(tc_miss, 0x03), + CMN_EVENT_MTSX(tdb_forward, 0x04), + CMN_EVENT_MTSX(tcq_hazard, 0x05), + CMN_EVENT_MTSX(tcq_rd_alloc, 0x06), + CMN_EVENT_MTSX(tcq_wr_alloc, 0x07), + CMN_EVENT_MTSX(tcq_cmo_alloc, 0x08), + CMN_EVENT_MTSX(axi_rd_req, 0x09), + CMN_EVENT_MTSX(axi_wr_req, 0x0a), + CMN_EVENT_MTSX(tcq_occ_cnt_ovfl, 0x0b), + CMN_EVENT_MTSX(tdb_occ_cnt_ovfl, 0x0c), NULL }; @@ -1773,6 +1821,7 @@ static int arm_cmn_remove(struct platform_device *pdev) #ifdef CONFIG_OF static const struct of_device_id arm_cmn_of_match[] = { { .compatible = "arm,cmn-600", .data = (void *)CMN600 }, + { .compatible = "arm,ci-700", .data = (void *)CI700 }, {} }; MODULE_DEVICE_TABLE(of, arm_cmn_of_match); From a88fa6c28b867a387e3af202d6dbbb754d3aa2f1 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 3 Dec 2021 11:45:03 +0000 Subject: [PATCH 14/14] perf/arm-cmn: Add debugfs topology info In general, detailed performance analysis will require knoweldge of the the SoC beyond the CMN itself - e.g. which actual CPUs/peripherals/etc. are connected to each node. However for certain development and bringup tasks it can be useful to have a quick overview of the CMN internal topology to hand too. Add a debugfs file to map this out. Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/159fd4d7e19fb3c8801a8cb64ee73ec50f55903c.1638530442.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/perf/arm-cmn.c | 151 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 148 insertions(+), 3 deletions(-) diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index 28ab87a6cde4..0e48adce57ef 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -287,6 +288,7 @@ struct arm_cmn { struct hlist_node cpuhp_node; struct pmu pmu; + struct dentry *debug; }; #define to_cmn(p) container_of(p, struct arm_cmn, pmu) @@ -351,6 +353,140 @@ static struct arm_cmn_node *arm_cmn_node(const struct arm_cmn *cmn, return NULL; } +struct dentry *arm_cmn_debugfs; + +#ifdef CONFIG_DEBUG_FS +static const char *arm_cmn_device_type(u8 type) +{ + switch(type) { + case 0x01: return " RN-I |"; + case 0x02: return " RN-D |"; + case 0x04: return " RN-F_B |"; + case 0x05: return "RN-F_B_E|"; + case 0x06: return " RN-F_A |"; + case 0x07: return "RN-F_A_E|"; + case 0x08: return " HN-T |"; + case 0x09: return " HN-I |"; + case 0x0a: return " HN-D |"; + case 0x0c: return " SN-F |"; + case 0x0d: return " SBSX |"; + case 0x0e: return " HN-F |"; + case 0x0f: return " SN-F_E |"; + case 0x10: return " SN-F_D |"; + case 0x11: return " CXHA |"; + case 0x12: return " CXRA |"; + case 0x13: return " CXRH |"; + case 0x14: return " RN-F_D |"; + case 0x15: return "RN-F_D_E|"; + case 0x16: return " RN-F_C |"; + case 0x17: return "RN-F_C_E|"; + case 0x1c: return " MTSX |"; + default: return " |"; + } +} + +static void arm_cmn_show_logid(struct seq_file *s, int x, int y, int p, int d) +{ + struct arm_cmn *cmn = s->private; + struct arm_cmn_node *dn; + + for (dn = cmn->dns; dn->type; dn++) { + struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id); + + if (dn->type == CMN_TYPE_XP) + continue; + /* Ignore the extra components that will overlap on some ports */ + if (dn->type < CMN_TYPE_HNI) + continue; + + if (nid.x != x || nid.y != y || nid.port != p || nid.dev != d) + continue; + + seq_printf(s, " #%-2d |", dn->logid); + return; + } + seq_puts(s, " |"); +} + +static int arm_cmn_map_show(struct seq_file *s, void *data) +{ + struct arm_cmn *cmn = s->private; + int x, y, p, pmax = fls(cmn->ports_used); + + seq_puts(s, " X"); + for (x = 0; x < cmn->mesh_x; x++) + seq_printf(s, " %d ", x); + seq_puts(s, "\nY P D+"); + y = cmn->mesh_y; + while (y--) { + int xp_base = cmn->mesh_x * y; + u8 port[6][CMN_MAX_DIMENSION]; + + for (x = 0; x < cmn->mesh_x; x++) + seq_puts(s, "--------+"); + + seq_printf(s, "\n%d |", y); + for (x = 0; x < cmn->mesh_x; x++) { + struct arm_cmn_node *xp = cmn->xps + xp_base + x; + void __iomem *base = xp->pmu_base - CMN_PMU_OFFSET; + + port[0][x] = readl_relaxed(base + CMN_MXP__CONNECT_INFO_P0); + port[1][x] = readl_relaxed(base + CMN_MXP__CONNECT_INFO_P1); + port[2][x] = readl_relaxed(base + CMN_MXP__CONNECT_INFO_P2); + port[3][x] = readl_relaxed(base + CMN_MXP__CONNECT_INFO_P3); + port[4][x] = readl_relaxed(base + CMN_MXP__CONNECT_INFO_P4); + port[5][x] = readl_relaxed(base + CMN_MXP__CONNECT_INFO_P5); + seq_printf(s, " XP #%-2d |", xp_base + x); + } + + seq_puts(s, "\n |"); + for (x = 0; x < cmn->mesh_x; x++) { + u8 dtc = cmn->xps[xp_base + x].dtc; + + if (dtc & (dtc - 1)) + seq_puts(s, " DTC ?? |"); + else + seq_printf(s, " DTC %ld |", __ffs(dtc)); + } + seq_puts(s, "\n |"); + for (x = 0; x < cmn->mesh_x; x++) + seq_puts(s, "........|"); + + for (p = 0; p < pmax; p++) { + seq_printf(s, "\n %d |", p); + for (x = 0; x < cmn->mesh_x; x++) + seq_puts(s, arm_cmn_device_type(port[p][x])); + seq_puts(s, "\n 0|"); + for (x = 0; x < cmn->mesh_x; x++) + arm_cmn_show_logid(s, x, y, p, 0); + seq_puts(s, "\n 1|"); + for (x = 0; x < cmn->mesh_x; x++) + arm_cmn_show_logid(s, x, y, p, 1); + } + seq_puts(s, "\n-----+"); + } + for (x = 0; x < cmn->mesh_x; x++) + seq_puts(s, "--------+"); + seq_puts(s, "\n"); + return 0; +} +DEFINE_SHOW_ATTRIBUTE(arm_cmn_map); + +static void arm_cmn_debugfs_init(struct arm_cmn *cmn, int id) +{ + const char *name = "map"; + + if (id > 0) + name = devm_kasprintf(cmn->dev, GFP_KERNEL, "map_%d", id); + if (!name) + return; + + cmn->debug = debugfs_create_file(name, 0444, arm_cmn_debugfs, cmn, &arm_cmn_map_fops); +} +#else +static void arm_cmn_debugfs_init(struct arm_cmn *cmn, int id) {} +#endif + struct arm_cmn_hw_event { struct arm_cmn_node *dn; u64 dtm_idx[2]; @@ -1738,7 +1874,7 @@ static int arm_cmn_probe(struct platform_device *pdev) struct arm_cmn *cmn; const char *name; static atomic_t id; - int err, rootnode; + int err, rootnode, this_id; cmn = devm_kzalloc(&pdev->dev, sizeof(*cmn), GFP_KERNEL); if (!cmn) @@ -1792,7 +1928,8 @@ static int arm_cmn_probe(struct platform_device *pdev) .cancel_txn = arm_cmn_end_txn, }; - name = devm_kasprintf(cmn->dev, GFP_KERNEL, "arm_cmn_%d", atomic_fetch_inc(&id)); + this_id = atomic_fetch_inc(&id); + name = devm_kasprintf(cmn->dev, GFP_KERNEL, "arm_cmn_%d", this_id); if (!name) return -ENOMEM; @@ -1803,6 +1940,8 @@ static int arm_cmn_probe(struct platform_device *pdev) err = perf_pmu_register(&cmn->pmu, name, -1); if (err) cpuhp_state_remove_instance_nocalls(arm_cmn_hp_state, &cmn->cpuhp_node); + else + arm_cmn_debugfs_init(cmn, this_id); return err; } @@ -1815,6 +1954,7 @@ static int arm_cmn_remove(struct platform_device *pdev) perf_pmu_unregister(&cmn->pmu); cpuhp_state_remove_instance_nocalls(arm_cmn_hp_state, &cmn->cpuhp_node); + debugfs_remove(cmn->debug); return 0; } @@ -1857,9 +1997,13 @@ static int __init arm_cmn_init(void) return ret; arm_cmn_hp_state = ret; + arm_cmn_debugfs = debugfs_create_dir("arm-cmn", NULL); + ret = platform_driver_register(&arm_cmn_driver); - if (ret) + if (ret) { cpuhp_remove_multi_state(arm_cmn_hp_state); + debugfs_remove(arm_cmn_debugfs); + } return ret; } @@ -1867,6 +2011,7 @@ static void __exit arm_cmn_exit(void) { platform_driver_unregister(&arm_cmn_driver); cpuhp_remove_multi_state(arm_cmn_hp_state); + debugfs_remove(arm_cmn_debugfs); } module_init(arm_cmn_init);