From 78cdda7ea07d90989b27b5d5f49096f5e197dedd Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 14 Jun 2016 11:17:12 -0600 Subject: [PATCH 01/23] coresight: Fix NULL pointer dereference in _coresight_build_path _coresight_build_path assumes that all the connections of a csdev has the child_dev initialised. This may not be true if the particular component is not supported by the kernel config(e.g TPIU) but is present in the DT. In which case, building a path can cause a crash like this : Unable to handle kernel NULL pointer dereference at virtual address 00000010 pgd = ffffffc9750dd000 [00000010] *pgd=00000009f5e90003, *pud=00000009f5e90003, *pmd=0000000000000000 Internal error: Oops: 96000006 [#1] PREEMPT SMP Modules linked in: CPU: 4 PID: 1348 Comm: bash Not tainted 4.6.0-next-20160517 #1646 Hardware name: ARM Juno development board (r0) (DT) task: ffffffc97517a280 ti: ffffffc9762c4000 task.ti: ffffffc9762c4000 PC is at _coresight_build_path+0x18/0xe4 LR is at _coresight_build_path+0xc0/0xe4 pc : [] lr : [] pstate: 20000145 sp : ffffffc9762c7ba0 [] _coresight_build_path+0x18/0xe4 [] _coresight_build_path+0xc0/0xe4 [] _coresight_build_path+0xc0/0xe4 [] _coresight_build_path+0xc0/0xe4 [] _coresight_build_path+0xc0/0xe4 [] _coresight_build_path+0xc0/0xe4 [] coresight_build_path+0x40/0x68 [] coresight_enable+0x74/0x1bc [] enable_source_store+0x3c/0x6c [] dev_attr_store+0x18/0x28 [] sysfs_kf_write+0x40/0x50 [] kernfs_fop_write+0x140/0x1cc [] __vfs_write+0x28/0x110 [] vfs_write+0xa0/0x174 [] SyS_write+0x44/0xa0 [] el0_svc_naked+0x24/0x28 Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (cherry picked from commit ec48a1d981fe90ecb5bcfaaf1ae2c69d842cbbbc) --- drivers/hwtracing/coresight/coresight.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c index 5443d03a1eec..0fdaaf4a8994 100644 --- a/drivers/hwtracing/coresight/coresight.c +++ b/drivers/hwtracing/coresight/coresight.c @@ -385,7 +385,6 @@ static int _coresight_build_path(struct coresight_device *csdev, int i; bool found = false; struct coresight_node *node; - struct coresight_connection *conn; /* An activated sink has been found. Enqueue the element */ if ((csdev->type == CORESIGHT_DEV_TYPE_SINK || @@ -394,8 +393,9 @@ static int _coresight_build_path(struct coresight_device *csdev, /* Not a sink - recursively explore each port found on this element */ for (i = 0; i < csdev->nr_outport; i++) { - conn = &csdev->conns[i]; - if (_coresight_build_path(conn->child_dev, path) == 0) { + struct coresight_device *child_dev = csdev->conns[i].child_dev; + + if (child_dev && _coresight_build_path(child_dev, path) == 0) { found = true; break; } From bc61bcb2ce60ac41e29e615a58bcbcc9b3fc52bc Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 14 Jun 2016 11:17:13 -0600 Subject: [PATCH 02/23] coresight: Fix tmc_read_unprepare_etr At the end of the trace capture, we free the allocated memory, resetting the drvdata->buf to NULL, to indicate that trace data was collected and the next trace session should allocate the memory in tmc_enable_etr_sink_sysfs. The tmc_enable_etr_sink_sysfs, we only allocate memory if drvdata->vaddr is not NULL (which is not performed at the end of previous session). This can cause, drvdata->vaddr getting assigned NULL and later we do memset() which causes a crash as below : Unable to handle kernel NULL pointer dereference at virtual address 00000000 pgd = ffffffc9747f0000 [00000000] *pgd=00000009f402e003, *pud=00000009f402e003, *pmd=0000000000000000 Internal error: Oops: 96000046 [#1] PREEMPT SMP Modules linked in: CPU: 0 PID: 1592 Comm: bash Not tainted 4.7.0-rc1+ #1712 Hardware name: ARM Juno development board (r0) (DT) task: ffffffc078fe0080 ti: ffffffc974178000 task.ti: ffffffc974178000 PC is at __memset+0x1ac/0x200 LR is at tmc_enable_etr_sink+0xf8/0x304 pc : [] lr : [] pstate: 400001c5 sp : ffffffc97417bc00 x29: ffffffc97417bc00 x28: ffffffc974178000 Call trace: Exception stack(0xffffffc97417ba40 to 0xffffffc97417bb60) ba40: 0000000000000001 ffffffc974a5d098 ffffffc97417bc00 ffffff80083a002c ba60: ffffffc974a5d118 0000000000000000 0000000000000000 0000000000000000 ba80: 0000000000000001 0000000000000000 ffffff800859bdec 0000000000000040 baa0: ffffff8008b45b58 00000000000001c0 ffffffc97417baf0 ffffff80080eddb4 bac0: 0000000000000003 ffffffc078fe0080 ffffffc078fe0960 ffffffc078fe0940 bae0: 0000000000000000 0000000000000000 00000000007fffc0 0000000000000004 bb00: 0000000000000000 0000000000000040 000000000000003f 0000000000000000 bb20: 0000000000000000 0000000000000000 0000000000000000 0000000000000001 bb40: ffffffc078fe0960 0000000000000018 ffffffffffffffff 0008669628000000 [] __memset+0x1ac/0x200 [] coresight_enable_path+0xa8/0x1dc [] coresight_enable+0x88/0x1b8 [] enable_source_store+0x3c/0x6c [] dev_attr_store+0x18/0x28 [] sysfs_kf_write+0x54/0x64 [] kernfs_fop_write+0x148/0x1d8 [] __vfs_write+0x28/0x110 [] vfs_write+0xa0/0x198 [] SyS_write+0x44/0xa0 [] el0_svc_naked+0x24/0x28 Code: 91010108 54ffff4a 8b040108 cb050042 (d50b7428) This patch fixes the issue by clearing the drvdata->vaddr while we free the allocated buffer at the end of a session, so that we allocate the memory again. Cc: mathieu.poirier@linaro.org Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 8e215298a15d5b93c6fa22895c406da538769bca) --- drivers/hwtracing/coresight/coresight-tmc-etr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index 847d1b5f2c13..3369d7a80a51 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -315,7 +315,7 @@ int tmc_read_unprepare_etr(struct tmc_drvdata *drvdata) */ vaddr = drvdata->vaddr; paddr = drvdata->paddr; - drvdata->buf = NULL; + drvdata->buf = drvdata->vaddr = NULL; } drvdata->reading = false; From 04874216ced720abd8e05c5fa78cdfee21a6d645 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 14 Jun 2016 11:17:14 -0600 Subject: [PATCH 03/23] coresight: Fix erroneous memset in tmc_read_unprepare_etr At the end of a trace collection, we try to clear the entire buffer and enable the ETR back if it was already enabled. But, we would have adjusted the drvdata->buf to point to the beginning of the trace data in the trace buffer @drvdata->vaddr. So, the following code which clears the buffer is dangerous and can cause crashes, like below : memset(drvdata->buf, 0, drvdata->size); Unable to handle kernel paging request at virtual address ffffff800a145000 pgd = ffffffc974726000 *pgd=00000009f3e91003, *pud=00000009f3e91003, *pmd=0000000000000000 PREEMPT SMP Modules linked in: CPU: 4 PID: 1692 Comm: dd Not tainted 4.7.0-rc2+ #1721 Hardware name: ARM Juno development board (r0) (DT) task: ffffffc9734a0080 ti: ffffffc974460000 task.ti: ffffffc974460000 PC is at __memset+0x1ac/0x200 LR is at tmc_read_unprepare_etr+0x144/0x1bc pc : [] lr : [] pstate: 200001c5 ... [] __memset+0x1ac/0x200 [] tmc_release+0x90/0x94 [] __fput+0xa8/0x1ec [] ____fput+0xc/0x14 [] task_work_run+0xb0/0xe4 [] do_notify_resume+0x64/0x6c [] work_pending+0x10/0x14 Code: 91010108 54ffff4a 8b040108 cb050042 (d50b7428) Since we clear the buffer anyway in the following call to tmc_etr_enable_hw(), remove the erroneous memset(). Fixes: commit de5461970b3e9e1 ("coresight: tmc: allocating memory when needed") Cc: Mathieu Poirier Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (cherry picked from commit f3b8172fe15fbed0d0d33d99780e122213e00684) --- drivers/hwtracing/coresight/coresight-tmc-etr.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index 3369d7a80a51..688be9e060fc 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -300,13 +300,10 @@ int tmc_read_unprepare_etr(struct tmc_drvdata *drvdata) if (local_read(&drvdata->mode) == CS_MODE_SYSFS) { /* * The trace run will continue with the same allocated trace - * buffer. As such zero-out the buffer so that we don't end - * up with stale data. - * - * Since the tracer is still enabled drvdata::buf - * can't be NULL. + * buffer. The trace buffer is cleared in tmc_etr_enable_hw(), + * so we don't have to explicitly clear it. Also, since the + * tracer is still enabled drvdata::buf can't be NULL. */ - memset(drvdata->buf, 0, drvdata->size); tmc_etr_enable_hw(drvdata); } else { /* From 288bbf9d8dd0e6d45d54b79c2ea8946fd6832081 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Fri, 6 May 2016 15:35:50 +0100 Subject: [PATCH 04/23] coresight: Handle build path error Enabling a component via sysfs (echo 1 > enable_source), would trigger building a path from the enabled sources to the sink. If there is an error in the process (e.g, sink not enabled or the device (CPU corresponding to ETM) is not online), we never report failure, except for leaving a message in the dmesg. Do proper error checking for the build path and return the error. Before: $ echo 0 > /sys/devices/system/cpu/cpu2/online $ echo 1 > /sys/devices/cs_etm/cpu2/enable_source $ echo $? 0 After: $ echo 0 > /sys/devices/system/cpu/cpu2/online $ echo 1 > /sys/devices/cs_etm/cpu2/enable_source -bash: echo: write error: No such device or address Signed-off-by: Suzuki K Poulose Acked-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 5014e904681ddbdf663bb20f134eb053ddccb181) --- drivers/hwtracing/coresight/coresight.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c index 0fdaaf4a8994..d08d1ab9bba5 100644 --- a/drivers/hwtracing/coresight/coresight.c +++ b/drivers/hwtracing/coresight/coresight.c @@ -425,6 +425,7 @@ static int _coresight_build_path(struct coresight_device *csdev, struct list_head *coresight_build_path(struct coresight_device *csdev) { struct list_head *path; + int rc; path = kzalloc(sizeof(struct list_head), GFP_KERNEL); if (!path) @@ -432,9 +433,10 @@ struct list_head *coresight_build_path(struct coresight_device *csdev) INIT_LIST_HEAD(path); - if (_coresight_build_path(csdev, path)) { + rc = _coresight_build_path(csdev, path); + if (rc) { kfree(path); - path = NULL; + return ERR_PTR(rc); } return path; @@ -507,8 +509,9 @@ int coresight_enable(struct coresight_device *csdev) goto out; path = coresight_build_path(csdev); - if (!path) { + if (IS_ERR(path)) { pr_err("building path(s) failed\n"); + ret = PTR_ERR(path); goto out; } From b2364ea00c5bd20e4f786f14fe411f62f1248ad4 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Sun, 18 Oct 2015 16:50:48 -0600 Subject: [PATCH 05/23] coresight: Add support for Juno platform This patch adds support for ARM's juno platform. More specifically it has definitions for the A53/57 tracers, the A53/57 cluster funnels, the main funnel and the ETF in circular buffer mode. Support for all the other coresight IP blocks is not addressed. Signed-off-by: Mathieu Poirier --- arch/arm64/boot/dts/arm/juno.dts | 305 +++++++++++++++++++++++++++++++ 1 file changed, 305 insertions(+) diff --git a/arch/arm64/boot/dts/arm/juno.dts b/arch/arm64/boot/dts/arm/juno.dts index 53442b5ee4ff..3e1a84b01b50 100644 --- a/arch/arm64/boot/dts/arm/juno.dts +++ b/arch/arm64/boot/dts/arm/juno.dts @@ -143,5 +143,310 @@ pmu_a53 { <&A53_3>; }; + etr@20070000 { + compatible = "arm,coresight-tmc", "arm,primecell"; + reg = <0 0x20070000 0 0x1000>; + + clocks = <&soc_smc50mhz>; + clock-names = "apb_pclk"; + port { + etr_in_port: endpoint { + slave-mode; + remote-endpoint = <&replicator_out_port1>; + }; + }; + }; + + tpiu@20030000 { + compatible = "arm,coresight-tpiu", "arm,primecell"; + reg = <0 0x20030000 0 0x1000>; + + clocks = <&soc_smc50mhz>; + clock-names = "apb_pclk"; + port { + tpiu_in_port: endpoint { + slave-mode; + remote-endpoint = <&replicator_out_port0>; + }; + }; + }; + + replicator@20020000 { + /* non-configurable replicators don't show up on the + * AMBA bus. As such no need to add "arm,primecell". + */ + compatible = "arm,coresight-replicator"; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + /* replicator output ports */ + port@0 { + reg = <0>; + replicator_out_port0: endpoint { + remote-endpoint = <&tpiu_in_port>; + }; + }; + + port@1 { + reg = <1>; + replicator_out_port1: endpoint { + remote-endpoint = <&etr_in_port>; + }; + }; + + /* replicator input port */ + port@2 { + reg = <0>; + replicator_in_port0: endpoint { + slave-mode; + remote-endpoint = <&etf_out_port>; + }; + }; + }; + }; + + etf@20010000 { + compatible = "arm,coresight-tmc", "arm,primecell"; + reg = <0 0x20010000 0 0x1000>; + + clocks = <&soc_smc50mhz>; + clock-names = "apb_pclk"; + ports { + #address-cells = <1>; + #size-cells = <0>; + + /* input port */ + port@0 { + reg = <0>; + etf_in_port: endpoint { + slave-mode; + remote-endpoint = + <&main_funnel_out_port>; + }; + }; + + /* output port */ + port@1 { + reg = <0>; + etf_out_port: endpoint { + remote-endpoint = + <&replicator_in_port0>; + }; + }; + }; + }; + + main_funnel@20040000 { + compatible = "arm,coresight-funnel", "arm,primecell"; + reg = <0 0x20040000 0 0x1000>; + + clocks = <&soc_smc50mhz>; + clock-names = "apb_pclk"; + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + main_funnel_out_port: endpoint { + remote-endpoint = + <&etf_in_port>; + }; + }; + + port@1 { + reg = <0>; + main_funnel_in_port0: endpoint { + slave-mode; + remote-endpoint = + <&A72_57_funnel_out_port>; + }; + }; + + port@2 { + reg = <1>; + main_funnel_in_port1: endpoint { + slave-mode; + remote-endpoint = <&A53_funnel_out_port>; + }; + }; + + }; + }; + + A72_57_funnel@220c0000 { + compatible = "arm,coresight-funnel", "arm,primecell"; + reg = <0 0x220c0000 0 0x1000>; + + clocks = <&soc_smc50mhz>; + clock-names = "apb_pclk"; + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + A72_57_funnel_out_port: endpoint { + remote-endpoint = + <&main_funnel_in_port0>; + }; + }; + + port@1 { + reg = <0>; + A72_57_funnel_in_port0: endpoint { + slave-mode; + remote-endpoint = + <&A72_57_etm0_out_port>; + }; + }; + + port@2 { + reg = <1>; + A72_57_funnel_in_port1: endpoint { + slave-mode; + remote-endpoint = + <&A72_57_etm1_out_port>; + }; + }; + }; + }; + + A53_funnel@220c0000 { + compatible = "arm,coresight-funnel", "arm,primecell"; + reg = <0 0x230c0000 0 0x1000>; + + clocks = <&soc_smc50mhz>; + clock-names = "apb_pclk"; + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + A53_funnel_out_port: endpoint { + remote-endpoint = + <&main_funnel_in_port1>; + }; + }; + + port@1 { + reg = <0>; + A53_funnel_in_port0: endpoint { + slave-mode; + remote-endpoint = <&A53_etm0_out_port>; + }; + }; + + port@2 { + reg = <1>; + A53_funnel_in_port1: endpoint { + slave-mode; + remote-endpoint = <&A53_etm1_out_port>; + }; + }; + port@3 { + reg = <2>; + A53_funnel_in_port2: endpoint { + slave-mode; + remote-endpoint = <&A53_etm2_out_port>; + }; + }; + port@4 { + reg = <3>; + A53_funnel_in_port3: endpoint { + slave-mode; + remote-endpoint = <&A53_etm3_out_port>; + }; + }; + }; + }; + + etm@22040000 { + compatible = "arm,coresight-etm4x", "arm,primecell"; + reg = <0 0x22040000 0 0x1000>; + + cpu = <&A57_0>; + clocks = <&soc_smc50mhz>; + clock-names = "apb_pclk"; + port { + A72_57_etm0_out_port: endpoint { + remote-endpoint = <&A72_57_funnel_in_port0>; + }; + }; + }; + + etm@22140000 { + compatible = "arm,coresight-etm4x", "arm,primecell"; + reg = <0 0x22140000 0 0x1000>; + + cpu = <&A57_1>; + clocks = <&soc_smc50mhz>; + clock-names = "apb_pclk"; + port { + A72_57_etm1_out_port: endpoint { + remote-endpoint = <&A72_57_funnel_in_port1>; + }; + }; + }; + + etm@23040000 { + compatible = "arm,coresight-etm4x", "arm,primecell"; + reg = <0 0x23040000 0 0x1000>; + + cpu = <&A53_0>; + clocks = <&soc_smc50mhz>; + clock-names = "apb_pclk"; + port { + A53_etm0_out_port: endpoint { + remote-endpoint = <&A53_funnel_in_port0>; + }; + }; + }; + + etm@23140000 { + compatible = "arm,coresight-etm4x", "arm,primecell"; + reg = <0 0x23140000 0 0x1000>; + + cpu = <&A53_1>; + clocks = <&soc_smc50mhz>; + clock-names = "apb_pclk"; + port { + A53_etm1_out_port: endpoint { + remote-endpoint = <&A53_funnel_in_port1>; + }; + }; + }; + + etm@23240000 { + compatible = "arm,coresight-etm4x", "arm,primecell"; + reg = <0 0x23240000 0 0x1000>; + + cpu = <&A53_2>; + clocks = <&soc_smc50mhz>; + clock-names = "apb_pclk"; + port { + A53_etm2_out_port: endpoint { + remote-endpoint = <&A53_funnel_in_port2>; + }; + }; + }; + + etm@23340000 { + compatible = "arm,coresight-etm4x", "arm,primecell"; + reg = <0 0x23340000 0 0x1000>; + + cpu = <&A53_3>; + clocks = <&soc_smc50mhz>; + clock-names = "apb_pclk"; + port { + A53_etm3_out_port: endpoint { + remote-endpoint = <&A53_funnel_in_port3>; + }; + }; + }; + #include "juno-base.dtsi" }; From b90eb99a2c72a76c2334353638c212b7dd7d2bf1 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Thu, 10 Dec 2015 11:36:15 -0700 Subject: [PATCH 06/23] coresight: tmc: implementing TMC-ETR AUX space API This patch implement the AUX area interfaces required to use the TMC (configured as an ETR) from the Perf sub-system. The ETR is configured to work with contiguous memory only. Although not optimal, it allows the IP block to be used while the scatter-gather mode of operation is being worked on. The heuristic is heavily borrowed from the ETB10 and TMC-ETF implementation. Signed-off-by: Mathieu Poirier --- .../hwtracing/coresight/coresight-tmc-etr.c | 243 ++++++++++++++++++ 1 file changed, 243 insertions(+) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index 688be9e060fc..ba54e1942a8c 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -15,11 +15,30 @@ * this program. If not, see . */ +#include #include #include +#include + #include "coresight-priv.h" #include "coresight-tmc.h" +/** + * struct cs_etr_buffer - keep track of a recording session' specifics + * @tmc: generic portion of the TMC buffers + * @paddr: the physical address of a DMA'able contiguous memory area + * @vaddr: the virtual address associated to @paddr + * @size: how much memory we have, starting at @paddr + * @dev: the device @vaddr has been tied to + */ +struct cs_etr_buffers { + struct cs_buffers tmc; + dma_addr_t paddr; + void __iomem *vaddr; + u32 size; + struct device *dev; +}; + void tmc_etr_enable_hw(struct tmc_drvdata *drvdata) { u32 axictl; @@ -235,9 +254,233 @@ static void tmc_disable_etr_sink(struct coresight_device *csdev) dev_info(drvdata->dev, "TMC-ETR disabled\n"); } +static void *tmc_alloc_etr_buffer(struct coresight_device *csdev, int cpu, + void **pages, int nr_pages, bool overwrite) +{ + int node; + struct cs_etr_buffers *buf; + struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + + if (cpu == -1) + cpu = smp_processor_id(); + node = cpu_to_node(cpu); + + /* Allocate memory structure for interaction with Perf */ + buf = kzalloc_node(sizeof(struct cs_etr_buffers), GFP_KERNEL, node); + if (!buf) + return NULL; + + buf->dev = drvdata->dev; + buf->size = drvdata->size; + buf->vaddr = dma_alloc_coherent(buf->dev, buf->size, + &buf->paddr, GFP_KERNEL); + if (!buf->vaddr) { + kfree(buf); + return NULL; + } + + buf->tmc.snapshot = overwrite; + buf->tmc.nr_pages = nr_pages; + buf->tmc.data_pages = pages; + + return buf; +} + +static void tmc_free_etr_buffer(void *config) +{ + struct cs_etr_buffers *buf = config; + + dma_free_coherent(buf->dev, buf->size, buf->vaddr, buf->paddr); + kfree(buf); +} + +static int tmc_set_etr_buffer(struct coresight_device *csdev, + struct perf_output_handle *handle, + void *sink_config) +{ + int ret = 0; + unsigned long head; + struct cs_etr_buffers *buf = sink_config; + struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + + /* wrap head around to the amount of space we have */ + head = handle->head & ((buf->tmc.nr_pages << PAGE_SHIFT) - 1); + + /* find the page to write to */ + buf->tmc.cur = head / PAGE_SIZE; + + /* and offset within that page */ + buf->tmc.offset = head % PAGE_SIZE; + + local_set(&buf->tmc.data_size, 0); + + /* Tell the HW where to put the trace data */ + drvdata->vaddr = buf->vaddr; + drvdata->paddr = buf->paddr; + memset(drvdata->vaddr, 0, drvdata->size); + + return ret; +} + +static unsigned long tmc_reset_etr_buffer(struct coresight_device *csdev, + struct perf_output_handle *handle, + void *sink_config, bool *lost) +{ + long size = 0; + struct cs_etr_buffers *buf = sink_config; + struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + + if (buf) { + /* + * In snapshot mode ->data_size holds the new address of the + * ring buffer's head. The size itself is the whole address + * range since we want the latest information. + */ + if (buf->tmc.snapshot) { + size = buf->tmc.nr_pages << PAGE_SHIFT; + handle->head = local_xchg(&buf->tmc.data_size, size); + } + + /* + * Tell the tracer PMU how much we got in this run and if + * something went wrong along the way. Nobody else can use + * this cs_etr_buffers instance until we are done. As such + * resetting parameters here and squaring off with the ring + * buffer API in the tracer PMU is fine. + */ + *lost = !!local_xchg(&buf->tmc.lost, 0); + size = local_xchg(&buf->tmc.data_size, 0); + } + + /* Get ready for another run */ + drvdata->vaddr = NULL; + drvdata->paddr = 0; + + return size; +} + +static void tmc_update_etr_buffer(struct coresight_device *csdev, + struct perf_output_handle *handle, + void *sink_config) +{ + int i, cur; + u32 *buf_ptr; + u32 read_ptr, write_ptr; + u32 status, to_read; + unsigned long offset; + struct cs_buffers *buf = sink_config; + struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + + if (!buf) + return; + + /* This shouldn't happen */ + if (WARN_ON_ONCE(local_read(&drvdata->mode) != CS_MODE_PERF)) + return; + + CS_UNLOCK(drvdata->base); + + tmc_flush_and_stop(drvdata); + + read_ptr = readl_relaxed(drvdata->base + TMC_RRP); + write_ptr = readl_relaxed(drvdata->base + TMC_RWP); + + /* + * Get a hold of the status register and see if a wrap around + * has occurred. If so adjust things accordingly. + */ + status = readl_relaxed(drvdata->base + TMC_STS); + if (status & TMC_STS_FULL) { + local_inc(&buf->lost); + to_read = drvdata->size; + } else { + to_read = CIRC_CNT(write_ptr, read_ptr, drvdata->size); + } + + /* + * The TMC RAM buffer may be bigger than the space available in the + * perf ring buffer (handle->size). If so advance the RRP so that we + * get the latest trace data. + */ + if (to_read > handle->size) { + u32 buffer_start, mask = 0; + + /* Read buffer start address in system memory */ + buffer_start = readl_relaxed(drvdata->base + TMC_DBALO); + + /* + * The value written to RRP must be byte-address aligned to + * the width of the trace memory databus _and_ to a frame + * boundary (16 byte), whichever is the biggest. For example, + * for 32-bit, 64-bit and 128-bit wide trace memory, the four + * LSBs must be 0s. For 256-bit wide trace memory, the five + * LSBs must be 0s. + */ + switch (drvdata->memwidth) { + case TMC_MEM_INTF_WIDTH_32BITS: + case TMC_MEM_INTF_WIDTH_64BITS: + case TMC_MEM_INTF_WIDTH_128BITS: + mask = GENMASK(31, 5); + break; + case TMC_MEM_INTF_WIDTH_256BITS: + mask = GENMASK(31, 6); + break; + } + + /* + * Make sure the new size is aligned in accordance with the + * requirement explained above. + */ + to_read = handle->size & mask; + /* Move the RAM read pointer up */ + read_ptr = (write_ptr + drvdata->size) - to_read; + /* Make sure we are still within our limits */ + if (read_ptr > (buffer_start + (drvdata->size - 1))) + read_ptr -= drvdata->size; + /* Tell the HW */ + writel_relaxed(read_ptr, drvdata->base + TMC_RRP); + local_inc(&buf->lost); + } + + cur = buf->cur; + offset = buf->offset; + + /* for every byte to read */ + for (i = 0; i < to_read; i += 4) { + buf_ptr = buf->data_pages[cur] + offset; + *buf_ptr = readl_relaxed(drvdata->base + TMC_RRD); + + offset += 4; + if (offset >= PAGE_SIZE) { + offset = 0; + cur++; + /* wrap around at the end of the buffer */ + cur &= buf->nr_pages - 1; + } + } + + /* + * In snapshot mode all we have to do is communicate to + * perf_aux_output_end() the address of the current head. In full + * trace mode the same function expects a size to move rb->aux_head + * forward. + */ + if (buf->snapshot) + local_set(&buf->data_size, (cur * PAGE_SIZE) + offset); + else + local_add(to_read, &buf->data_size); + + CS_LOCK(drvdata->base); +} + static const struct coresight_ops_sink tmc_etr_sink_ops = { .enable = tmc_enable_etr_sink, .disable = tmc_disable_etr_sink, + .alloc_buffer = tmc_alloc_etr_buffer, + .free_buffer = tmc_free_etr_buffer, + .set_buffer = tmc_set_etr_buffer, + .reset_buffer = tmc_reset_etr_buffer, + .update_buffer = tmc_update_etr_buffer, }; const struct coresight_ops tmc_etr_cs_ops = { From 2eb654b5c6ed9816af6cb8bc2eb2f29318387a95 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Tue, 20 Oct 2015 16:18:53 +0000 Subject: [PATCH 07/23] perf tools: making coresight PMU listable Adding the required mechanic allowing 'perf list pmu' to discover coresight ETM/PTM tracers. Signed-off-by: Mathieu Poirier --- MAINTAINERS | 1 + tools/perf/MANIFEST | 1 + tools/perf/arch/arm/util/Build | 2 ++ tools/perf/arch/arm/util/pmu.c | 34 ++++++++++++++++++++++++++++++++++ tools/perf/config/Makefile | 11 ++++++++--- 5 files changed, 46 insertions(+), 3 deletions(-) create mode 100644 tools/perf/arch/arm/util/pmu.c diff --git a/MAINTAINERS b/MAINTAINERS index 6b4f6ac66326..cb53efc3760e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1007,6 +1007,7 @@ F: drivers/hwtracing/coresight/* F: Documentation/trace/coresight.txt F: Documentation/devicetree/bindings/arm/coresight.txt F: Documentation/ABI/testing/sysfs-bus-coresight-devices-* +F: tools/perf/arch/arm/util/pmu.c ARM/CORGI MACHINE SUPPORT M: Richard Purdie diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index 39c38cb45b00..eeb21eb43898 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -57,6 +57,7 @@ include/asm-generic/bitops/const_hweight.h include/asm-generic/bitops/fls64.h include/asm-generic/bitops/__fls.h include/asm-generic/bitops/fls.h +include/linux/coresight-pmu.h include/linux/perf_event.h include/linux/list.h include/linux/hash.h diff --git a/tools/perf/arch/arm/util/Build b/tools/perf/arch/arm/util/Build index d22e3d07de3d..66ab0b05549c 100644 --- a/tools/perf/arch/arm/util/Build +++ b/tools/perf/arch/arm/util/Build @@ -2,3 +2,5 @@ libperf-$(CONFIG_DWARF) += dwarf-regs.o libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o + +libperf-$(CONFIG_AUXTRACE) += pmu.o diff --git a/tools/perf/arch/arm/util/pmu.c b/tools/perf/arch/arm/util/pmu.c new file mode 100644 index 000000000000..af9fb666b44f --- /dev/null +++ b/tools/perf/arch/arm/util/pmu.c @@ -0,0 +1,34 @@ +/* + * Copyright(C) 2015 Linaro Limited. All rights reserved. + * Author: Mathieu Poirier + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#include +#include +#include + +#include "../../util/pmu.h" + +struct perf_event_attr +*perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused) +{ +#ifdef HAVE_AUXTRACE_SUPPORT + if (!strcmp(pmu->name, CORESIGHT_ETM_PMU_NAME)) { + /* add ETM default config here */ + pmu->selectable = true; + } +#endif + return NULL; +} diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index de89ec574361..86fc47751ca4 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -647,9 +647,14 @@ ifdef LIBBABELTRACE endif ifndef NO_AUXTRACE - ifeq ($(feature-get_cpuid), 0) - msg := $(warning Your gcc lacks the __get_cpuid() builtin, disables support for auxtrace/Intel PT, please install a newer gcc); - NO_AUXTRACE := 1 + ifeq ($(ARCH),x86) + ifeq ($(feature-get_cpuid), 0) + msg := $(warning Your gcc lacks the __get_cpuid() builtin, disables support for auxtrace/Intel PT, please install a newer gcc); + NO_AUXTRACE := 1 + else + $(call detected,CONFIG_AUXTRACE) + CFLAGS += -DHAVE_AUXTRACE_SUPPORT + endif else $(call detected,CONFIG_AUXTRACE) CFLAGS += -DHAVE_AUXTRACE_SUPPORT From 65ad7b198dccae96d776db85d1a184df214dddb4 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Fri, 11 Sep 2015 20:43:39 +0000 Subject: [PATCH 08/23] perf tools: adding coresight etm PMU record capabilities Coresight ETMs are IP blocks used to perform HW assisted tracing on a CPU core. This patch introduce the required auxiliary API functions allowing the perf core to interact with a tracer. Signed-off-by: Mathieu Poirier --- MAINTAINERS | 3 + tools/perf/arch/arm/util/Build | 2 +- tools/perf/arch/arm/util/auxtrace.c | 54 +++ tools/perf/arch/arm/util/cs-etm.c | 563 ++++++++++++++++++++++++++++ tools/perf/arch/arm/util/cs-etm.h | 23 ++ tools/perf/arch/arm64/util/Build | 4 + tools/perf/util/auxtrace.c | 1 + tools/perf/util/auxtrace.h | 1 + tools/perf/util/cs-etm.h | 74 ++++ 9 files changed, 724 insertions(+), 1 deletion(-) create mode 100644 tools/perf/arch/arm/util/auxtrace.c create mode 100644 tools/perf/arch/arm/util/cs-etm.c create mode 100644 tools/perf/arch/arm/util/cs-etm.h create mode 100644 tools/perf/util/cs-etm.h diff --git a/MAINTAINERS b/MAINTAINERS index cb53efc3760e..6c94a8f3ca45 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1008,6 +1008,9 @@ F: Documentation/trace/coresight.txt F: Documentation/devicetree/bindings/arm/coresight.txt F: Documentation/ABI/testing/sysfs-bus-coresight-devices-* F: tools/perf/arch/arm/util/pmu.c +F: tools/perf/arch/arm/util/auxtrace.c +F: tools/perf/arch/arm/util/cs_etm.c +F: tools/perf/arch/arm/util/cs_etm.h ARM/CORGI MACHINE SUPPORT M: Richard Purdie diff --git a/tools/perf/arch/arm/util/Build b/tools/perf/arch/arm/util/Build index 66ab0b05549c..71de3fc40502 100644 --- a/tools/perf/arch/arm/util/Build +++ b/tools/perf/arch/arm/util/Build @@ -3,4 +3,4 @@ libperf-$(CONFIG_DWARF) += dwarf-regs.o libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o -libperf-$(CONFIG_AUXTRACE) += pmu.o +libperf-$(CONFIG_AUXTRACE) += pmu.o auxtrace.o cs-etm.o diff --git a/tools/perf/arch/arm/util/auxtrace.c b/tools/perf/arch/arm/util/auxtrace.c new file mode 100644 index 000000000000..95c38b683159 --- /dev/null +++ b/tools/perf/arch/arm/util/auxtrace.c @@ -0,0 +1,54 @@ +/* + * Copyright(C) 2015 Linaro Limited. All rights reserved. + * Author: Mathieu Poirier + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#include +#include + +#include "../../util/auxtrace.h" +#include "../../util/evlist.h" +#include "../../util/pmu.h" +#include "cs-etm.h" + +struct auxtrace_record +*auxtrace_record__init(struct perf_evlist *evlist, int *err) +{ + struct perf_pmu *cs_etm_pmu; + struct perf_evsel *evsel; + bool found_etm = false; + + cs_etm_pmu = perf_pmu__find(CORESIGHT_ETM_PMU_NAME); + + if (evlist) { + evlist__for_each(evlist, evsel) { + if (cs_etm_pmu && + evsel->attr.type == cs_etm_pmu->type) + found_etm = true; + } + } + + if (found_etm) + return cs_etm_record_init(err); + + /* + * Clear 'err' even if we haven't found a cs_etm event - that way perf + * record can still be used even if tracers aren't present. The NULL + * return value will take care of telling the infrastructure HW tracing + * isn't available. + */ + *err = 0; + return NULL; +} diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c new file mode 100644 index 000000000000..13a2188dc5dc --- /dev/null +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -0,0 +1,563 @@ +/* + * Copyright(C) 2015 Linaro Limited. All rights reserved. + * Author: Mathieu Poirier + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include + +#include "cs-etm.h" +#include "../../perf.h" +#include "../../util/auxtrace.h" +#include "../../util/cpumap.h" +#include "../../util/evlist.h" +#include "../../util/pmu.h" +#include "../../util/thread_map.h" +#include "../../util/cs-etm.h" + +#include + +struct cs_etm_recording { + struct auxtrace_record itr; + struct perf_pmu *cs_etm_pmu; + struct perf_evlist *evlist; + bool snapshot_mode; + size_t snapshot_size; +}; + +static bool cs_etm_is_etmv4(struct auxtrace_record *itr, int cpu); + +static int cs_etm_parse_snapshot_options(struct auxtrace_record *itr, + struct record_opts *opts, + const char *str) +{ + struct cs_etm_recording *ptr = + container_of(itr, struct cs_etm_recording, itr); + unsigned long long snapshot_size = 0; + char *endptr; + + if (str) { + snapshot_size = strtoull(str, &endptr, 0); + if (*endptr || snapshot_size > SIZE_MAX) + return -1; + } + + opts->auxtrace_snapshot_mode = true; + opts->auxtrace_snapshot_size = snapshot_size; + ptr->snapshot_size = snapshot_size; + + return 0; +} + +static int cs_etm_recording_options(struct auxtrace_record *itr, + struct perf_evlist *evlist, + struct record_opts *opts) +{ + struct cs_etm_recording *ptr = + container_of(itr, struct cs_etm_recording, itr); + struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu; + struct perf_evsel *evsel, *cs_etm_evsel = NULL; + const struct cpu_map *cpus = evlist->cpus; + bool privileged = (geteuid() == 0 || perf_event_paranoid() < 0); + + ptr->evlist = evlist; + ptr->snapshot_mode = opts->auxtrace_snapshot_mode; + + evlist__for_each(evlist, evsel) { + if (evsel->attr.type == cs_etm_pmu->type) { + if (cs_etm_evsel) { + pr_err("There may be only one %s event\n", + CORESIGHT_ETM_PMU_NAME); + return -EINVAL; + } + evsel->attr.freq = 0; + evsel->attr.sample_period = 1; + cs_etm_evsel = evsel; + opts->full_auxtrace = true; + } + } + + /* no need to continue if at least one event of interest was found */ + if (!cs_etm_evsel) + return 0; + + if (opts->use_clockid) { + pr_err("Cannot use clockid (-k option) with %s\n", + CORESIGHT_ETM_PMU_NAME); + return -EINVAL; + } + + /* we are in snapshot mode */ + if (opts->auxtrace_snapshot_mode) { + /* + * No size were given to '-S' or '-m,', so go with + * the default + */ + if (!opts->auxtrace_snapshot_size && + !opts->auxtrace_mmap_pages) { + if (privileged) { + opts->auxtrace_mmap_pages = MiB(4) / page_size; + } else { + opts->auxtrace_mmap_pages = + KiB(128) / page_size; + if (opts->mmap_pages == UINT_MAX) + opts->mmap_pages = KiB(256) / page_size; + } + } else if (!opts->auxtrace_mmap_pages && !privileged && + opts->mmap_pages == UINT_MAX) { + opts->mmap_pages = KiB(256) / page_size; + } + + /* + * '-m,xyz' was specified but no snapshot size, so make the + * snapshot size as big as the auxtrace mmap area. + */ + if (!opts->auxtrace_snapshot_size) { + opts->auxtrace_snapshot_size = + opts->auxtrace_mmap_pages * (size_t)page_size; + } + + /* + * -Sxyz was specified but no auxtrace mmap area, so make the + * auxtrace mmap area big enough to fit the requested snapshot + * size. + */ + if (!opts->auxtrace_mmap_pages) { + size_t sz = opts->auxtrace_snapshot_size; + + sz = round_up(sz, page_size) / page_size; + opts->auxtrace_mmap_pages = roundup_pow_of_two(sz); + } + + /* Snapshost size can't be bigger than the auxtrace area */ + if (opts->auxtrace_snapshot_size > + opts->auxtrace_mmap_pages * (size_t)page_size) { + pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n", + opts->auxtrace_snapshot_size, + opts->auxtrace_mmap_pages * (size_t)page_size); + return -EINVAL; + } + + /* Something went wrong somewhere - this shouldn't happen */ + if (!opts->auxtrace_snapshot_size || + !opts->auxtrace_mmap_pages) { + pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n"); + return -EINVAL; + } + } + + /* We are in full trace mode but '-m,xyz' wasn't specified */ + if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) { + if (privileged) { + opts->auxtrace_mmap_pages = MiB(4) / page_size; + } else { + opts->auxtrace_mmap_pages = KiB(128) / page_size; + if (opts->mmap_pages == UINT_MAX) + opts->mmap_pages = KiB(256) / page_size; + } + + } + + /* Validate auxtrace_mmap_pages provided by user */ + if (opts->auxtrace_mmap_pages) { + unsigned int max_page = (KiB(128) / page_size); + size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size; + + if (!privileged && + opts->auxtrace_mmap_pages > max_page) { + opts->auxtrace_mmap_pages = max_page; + pr_err("auxtrace too big, truncating to %d\n", + max_page); + } + + if (!is_power_of_2(sz)) { + pr_err("Invalid mmap size for %s: must be a power of 2\n", + CORESIGHT_ETM_PMU_NAME); + return -EINVAL; + } + } + + if (opts->auxtrace_snapshot_mode) + pr_debug2("%s snapshot size: %zu\n", CORESIGHT_ETM_PMU_NAME, + opts->auxtrace_snapshot_size); + + if (cs_etm_evsel) { + /* + * To obtain the auxtrace buffer file descriptor, the auxtrace + * event must come first. + */ + perf_evlist__to_front(evlist, cs_etm_evsel); + /* + * In the case of per-cpu mmaps, we need the CPU on the + * AUX event. + */ + if (!cpu_map__empty(cpus)) + perf_evsel__set_sample_bit(cs_etm_evsel, CPU); + } + + /* Add dummy event to keep tracking */ + if (opts->full_auxtrace) { + struct perf_evsel *tracking_evsel; + int err; + + err = parse_events(evlist, "dummy:u", NULL); + if (err) + return err; + + tracking_evsel = perf_evlist__last(evlist); + perf_evlist__set_tracking_event(evlist, tracking_evsel); + + tracking_evsel->attr.freq = 0; + tracking_evsel->attr.sample_period = 1; + + /* In per-cpu case, always need the time of mmap events etc */ + if (!cpu_map__empty(cpus)) + perf_evsel__set_sample_bit(tracking_evsel, TIME); + } + + return 0; +} + +static u64 cs_etm_get_config(struct auxtrace_record *itr) +{ + u64 config = 0; + struct cs_etm_recording *ptr = + container_of(itr, struct cs_etm_recording, itr); + struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu; + struct perf_evlist *evlist = ptr->evlist; + struct perf_evsel *evsel; + + evlist__for_each(evlist, evsel) { + if (evsel->attr.type == cs_etm_pmu->type) { + /* + * Variable perf_event_attr::config is assigned to + * ETMv3/PTM. The bit fields have been made to match + * the ETMv3.5 ETRMCR register specification. See the + * PMU_FORMAT_ATTR() declarations in + * drivers/hwtracing/coresight/coresight-perf.c for + * details. + */ + config = evsel->attr.config; + break; + } + } + + return config; +} + +static size_t +cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused, + struct perf_evlist *evlist __maybe_unused) +{ + int i; + int etmv3 = 0, etmv4 = 0; + const struct cpu_map *cpus = evlist->cpus; + + /* cpu map is not empty, we have specific CPUs to work with */ + if (!cpu_map__empty(cpus)) { + for (i = 0; i < cpu_map__nr(cpus); i++) { + if (cs_etm_is_etmv4(itr, cpus->map[i])) + etmv4++; + else + etmv3++; + } + } else { + /* get configuration for all CPUs in the system */ + for (i = 0; i < cpu__max_cpu(); i++) { + if (cs_etm_is_etmv4(itr, i)) + etmv4++; + else + etmv3++; + } + } + + return (CS_ETM_HEADER_SIZE + + (etmv4 * CS_ETMV4_PRIV_SIZE) + + (etmv3 * CS_ETMV3_PRIV_SIZE)); +} + +static const char *metadata_etmv3_ro[CS_ETM_PRIV_MAX] = { + [CS_ETM_ETMCCER] = "mgmt/etmccer", + [CS_ETM_ETMIDR] = "mgmt/etmidr", +}; + +static const char *metadata_etmv4_ro[CS_ETMV4_PRIV_MAX] = { + [CS_ETMV4_TRCIDR0] = "trcidr/trcidr0", + [CS_ETMV4_TRCIDR1] = "trcidr/trcidr1", + [CS_ETMV4_TRCIDR2] = "trcidr/trcidr2", + [CS_ETMV4_TRCIDR8] = "trcidr/trcidr8", + [CS_ETMV4_TRCAUTHSTATUS] = "mgmt/trcauthstatus", +}; + +static bool cs_etm_is_etmv4(struct auxtrace_record *itr, int cpu) +{ + bool ret = false; + char path[PATH_MAX]; + int scan; + unsigned int val; + struct cs_etm_recording *ptr = + container_of(itr, struct cs_etm_recording, itr); + struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu; + + /* Take any of the RO files for ETMv4 and see if it present */ + snprintf(path, PATH_MAX, "cpu%d/%s", + cpu, metadata_etmv4_ro[CS_ETMV4_TRCIDR0]); + scan = perf_pmu__scan_file(cs_etm_pmu, path, "%x", &val); + + /* The file was read successfully, we have a winner */ + if (scan == 1) + ret = true; + + return ret; +} + +static int cs_etm_get_ro(struct perf_pmu *pmu, int cpu, const char *path) +{ + char pmu_path[PATH_MAX]; + int scan; + unsigned int val = 0; + + /* Get RO metadata from sysfs */ + snprintf(pmu_path, PATH_MAX, "cpu%d/%s", cpu, path); + + scan = perf_pmu__scan_file(pmu, pmu_path, "%x", &val); + if (scan != 1) + pr_err("%s: error reading: %s\n", __func__, pmu_path); + + return val; +} + +static void cs_etm_get_metadata(int cpu, u32 *offset, + struct auxtrace_record *itr, + struct auxtrace_info_event *info) +{ + u32 increment; + u64 magic; + struct cs_etm_recording *ptr = + container_of(itr, struct cs_etm_recording, itr); + struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu; + + /* first see what kind of tracer this cpu is affined to */ + if (cs_etm_is_etmv4(itr, cpu)) { + magic = __perf_cs_etmv4_magic; + /* Get trace configuration register */ + info->priv[*offset + CS_ETMV4_TRCCONFIGR] = + cs_etm_get_config(itr); + /* Get traceID from the framework */ + info->priv[*offset + CS_ETMV4_TRCTRACEIDR] = + coresight_get_trace_id(cpu); + /* Get read-only information from sysFS */ + info->priv[*offset + CS_ETMV4_TRCIDR0] = + cs_etm_get_ro(cs_etm_pmu, cpu, + metadata_etmv4_ro[CS_ETMV4_TRCIDR0]); + info->priv[*offset + CS_ETMV4_TRCIDR1] = + cs_etm_get_ro(cs_etm_pmu, cpu, + metadata_etmv4_ro[CS_ETMV4_TRCIDR1]); + info->priv[*offset + CS_ETMV4_TRCIDR2] = + cs_etm_get_ro(cs_etm_pmu, cpu, + metadata_etmv4_ro[CS_ETMV4_TRCIDR2]); + info->priv[*offset + CS_ETMV4_TRCIDR8] = + cs_etm_get_ro(cs_etm_pmu, cpu, + metadata_etmv4_ro[CS_ETMV4_TRCIDR8]); + info->priv[*offset + CS_ETMV4_TRCAUTHSTATUS] = + cs_etm_get_ro(cs_etm_pmu, cpu, + metadata_etmv4_ro + [CS_ETMV4_TRCAUTHSTATUS]); + + /* How much space was used */ + increment = CS_ETMV4_PRIV_MAX; + } else { + magic = __perf_cs_etmv3_magic; + /* Get configuration register */ + info->priv[*offset + CS_ETM_ETMCR] = cs_etm_get_config(itr); + /* Get traceID from the framework */ + info->priv[*offset + CS_ETM_ETMTRACEIDR] = + coresight_get_trace_id(cpu); + /* Get read-only information from sysFS */ + info->priv[*offset + CS_ETM_ETMCCER] = + cs_etm_get_ro(cs_etm_pmu, cpu, + metadata_etmv3_ro[CS_ETM_ETMCCER]); + info->priv[*offset + CS_ETM_ETMIDR] = + cs_etm_get_ro(cs_etm_pmu, cpu, + metadata_etmv3_ro[CS_ETM_ETMIDR]); + + /* How much space was used */ + increment = CS_ETM_PRIV_MAX; + } + + /* Build generic header portion */ + info->priv[*offset + CS_ETM_MAGIC] = magic; + info->priv[*offset + CS_ETM_CPU] = cpu; + /* Where the next CPU entry should start from */ + *offset += increment; +} + +static int cs_etm_info_fill(struct auxtrace_record *itr, + struct perf_session *session, + struct auxtrace_info_event *info, + size_t priv_size) +{ + int i; + u32 offset; + u64 nr_cpu, type; + const struct cpu_map *cpus = session->evlist->cpus; + struct cs_etm_recording *ptr = + container_of(itr, struct cs_etm_recording, itr); + struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu; + + if (priv_size != cs_etm_info_priv_size(itr, session->evlist)) + return -EINVAL; + + if (!session->evlist->nr_mmaps) + return -EINVAL; + + /* If the cpu_map is empty all CPUs are involved */ + nr_cpu = cpu_map__empty(cpus) ? cpu__max_cpu() : cpu_map__nr(cpus); + /* Get PMU type as dynamically assigned by the core */ + type = cs_etm_pmu->type; + + /* First fill out the session header */ + info->type = PERF_AUXTRACE_CS_ETM; + info->priv[CS_HEADER_VERSION_0] = 0; + info->priv[CS_PMU_TYPE_CPUS] = type << 32; + info->priv[CS_PMU_TYPE_CPUS] |= nr_cpu; + info->priv[CS_ETM_SNAPSHOT] = ptr->snapshot_mode; + + offset = CS_ETM_SNAPSHOT + 1; + + /* cpu map is not empty, we have specific CPUs to work with */ + if (!cpu_map__empty(cpus)) { + for (i = 0; i < cpu_map__nr(cpus) && offset < priv_size; i++) + cs_etm_get_metadata(cpus->map[i], &offset, itr, info); + } else { + /* get configuration for all CPUs in the system */ + for (i = 0; i < cpu__max_cpu(); i++) + cs_etm_get_metadata(i, &offset, itr, info); + } + + return 0; +} + +static int cs_etm_find_snapshot(struct auxtrace_record *itr __maybe_unused, + int idx, struct auxtrace_mmap *mm, + unsigned char *data __maybe_unused, + u64 *head, u64 *old) +{ + pr_debug3("%s: mmap index %d old head %zu new head %zu size %zu\n", + __func__, idx, (size_t)*old, (size_t)*head, mm->len); + + *old = *head; + *head += mm->len; + + return 0; +} + +static int cs_etm_snapshot_start(struct auxtrace_record *itr) +{ + struct cs_etm_recording *ptr = + container_of(itr, struct cs_etm_recording, itr); + struct perf_evsel *evsel; + + evlist__for_each(ptr->evlist, evsel) { + if (evsel->attr.type == ptr->cs_etm_pmu->type) + return perf_evsel__disable(evsel); + } + return -EINVAL; +} + +static int cs_etm_snapshot_finish(struct auxtrace_record *itr) +{ + struct cs_etm_recording *ptr = + container_of(itr, struct cs_etm_recording, itr); + struct perf_evsel *evsel; + + evlist__for_each(ptr->evlist, evsel) { + int nthreads = thread_map__nr(evsel->threads); + int ncpus = cpu_map__nr(evsel->cpus); + + if (evsel->attr.type == ptr->cs_etm_pmu->type) { + return perf_evsel__enable(evsel, ncpus, nthreads); + } + } + return -EINVAL; +} + +static u64 cs_etm_reference(struct auxtrace_record *itr __maybe_unused) +{ + return (((u64) rand() << 0) & 0x00000000FFFFFFFFull) | + (((u64) rand() << 32) & 0xFFFFFFFF00000000ull); +} + +static void cs_etm_recording_free(struct auxtrace_record *itr) +{ + struct cs_etm_recording *ptr = + container_of(itr, struct cs_etm_recording, itr); + free(ptr); +} + +static int cs_etm_read_finish(struct auxtrace_record *itr, int idx) +{ + struct cs_etm_recording *ptr = + container_of(itr, struct cs_etm_recording, itr); + struct perf_evsel *evsel; + + evlist__for_each(ptr->evlist, evsel) { + if (evsel->attr.type == ptr->cs_etm_pmu->type) + return perf_evlist__enable_event_idx(ptr->evlist, + evsel, idx); + } + + return -EINVAL; +} + +struct auxtrace_record *cs_etm_record_init(int *err) +{ + struct perf_pmu *cs_etm_pmu; + struct cs_etm_recording *ptr; + + cs_etm_pmu = perf_pmu__find(CORESIGHT_ETM_PMU_NAME); + + if (!cs_etm_pmu) { + *err = -EINVAL; + goto out; + } + + ptr = zalloc(sizeof(struct cs_etm_recording)); + if (!ptr) { + *err = -ENOMEM; + goto out; + } + + ptr->cs_etm_pmu = cs_etm_pmu; + ptr->itr.parse_snapshot_options = cs_etm_parse_snapshot_options; + ptr->itr.recording_options = cs_etm_recording_options; + ptr->itr.info_priv_size = cs_etm_info_priv_size; + ptr->itr.info_fill = cs_etm_info_fill; + ptr->itr.find_snapshot = cs_etm_find_snapshot; + ptr->itr.snapshot_start = cs_etm_snapshot_start; + ptr->itr.snapshot_finish = cs_etm_snapshot_finish; + ptr->itr.reference = cs_etm_reference; + ptr->itr.free = cs_etm_recording_free; + ptr->itr.read_finish = cs_etm_read_finish; + + *err = 0; + return &ptr->itr; +out: + return NULL; +} diff --git a/tools/perf/arch/arm/util/cs-etm.h b/tools/perf/arch/arm/util/cs-etm.h new file mode 100644 index 000000000000..909f486d02d1 --- /dev/null +++ b/tools/perf/arch/arm/util/cs-etm.h @@ -0,0 +1,23 @@ +/* + * Copyright(C) 2015 Linaro Limited. All rights reserved. + * Author: Mathieu Poirier + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#ifndef INCLUDE__PERF_CS_ETM_H__ +#define INCLUDE__PERF_CS_ETM_H__ + +struct auxtrace_record *cs_etm_record_init(int *err); + +#endif diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build index e58123a8912b..f92918154fec 100644 --- a/tools/perf/arch/arm64/util/Build +++ b/tools/perf/arch/arm64/util/Build @@ -1,2 +1,6 @@ libperf-$(CONFIG_DWARF) += dwarf-regs.o libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o + +libperf-$(CONFIG_AUXTRACE) += ../../arm/util/pmu.o \ + ../../arm/util/auxtrace.o \ + ../../arm/util/cs-etm.o diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index cc1c9ce5cc56..a6f291dbc4d9 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -892,6 +892,7 @@ int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused, return intel_pt_process_auxtrace_info(event, session); case PERF_AUXTRACE_INTEL_BTS: return intel_bts_process_auxtrace_info(event, session); + case PERF_AUXTRACE_CS_ETM: case PERF_AUXTRACE_UNKNOWN: default: return -EINVAL; diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index e5a8e2d4f2af..adb53e7bcabf 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -41,6 +41,7 @@ enum auxtrace_type { PERF_AUXTRACE_UNKNOWN, PERF_AUXTRACE_INTEL_PT, PERF_AUXTRACE_INTEL_BTS, + PERF_AUXTRACE_CS_ETM, }; enum itrace_period_type { diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h new file mode 100644 index 000000000000..3cc6bc3263fe --- /dev/null +++ b/tools/perf/util/cs-etm.h @@ -0,0 +1,74 @@ +/* + * Copyright(C) 2015 Linaro Limited. All rights reserved. + * Author: Mathieu Poirier + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#ifndef INCLUDE__UTIL_PERF_CS_ETM_H__ +#define INCLUDE__UTIL_PERF_CS_ETM_H__ + +/* Versionning header in case things need tro change in the future. That way + * decoding of old snapshot is still possible. + */ +enum { + /* Starting with 0x0 */ + CS_HEADER_VERSION_0, + /* PMU->type (32 bit), total # of CPUs (32 bit) */ + CS_PMU_TYPE_CPUS, + CS_ETM_SNAPSHOT, + CS_HEADER_VERSION_0_MAX, +}; + +/* Beginning of header common to both ETMv3 and V4 */ +enum { + CS_ETM_MAGIC, + CS_ETM_CPU, +}; + +/* ETMv3/PTM metadata */ +enum { + /* Dynamic, configurable parameters */ + CS_ETM_ETMCR = CS_ETM_CPU + 1, + CS_ETM_ETMTRACEIDR, + /* RO, taken from sysFS */ + CS_ETM_ETMCCER, + CS_ETM_ETMIDR, + CS_ETM_PRIV_MAX, +}; + +/* ETMv4 metadata */ +enum { + /* Dynamic, configurable parameters */ + CS_ETMV4_TRCCONFIGR = CS_ETM_CPU + 1, + CS_ETMV4_TRCTRACEIDR, + /* RO, taken from sysFS */ + CS_ETMV4_TRCIDR0, + CS_ETMV4_TRCIDR1, + CS_ETMV4_TRCIDR2, + CS_ETMV4_TRCIDR8, + CS_ETMV4_TRCAUTHSTATUS, + CS_ETMV4_PRIV_MAX, +}; + +#define KiB(x) ((x) * 1024) +#define MiB(x) ((x) * 1024 * 1024) + +#define CS_ETM_HEADER_SIZE (CS_HEADER_VERSION_0_MAX * sizeof(u64)) + +static const u64 __perf_cs_etmv3_magic = 0x3030303030303030ULL; +static const u64 __perf_cs_etmv4_magic = 0x4040404040404040ULL; +#define CS_ETMV3_PRIV_SIZE (CS_ETM_PRIV_MAX * sizeof(u64)) +#define CS_ETMV4_PRIV_SIZE (CS_ETMV4_PRIV_MAX * sizeof(u64)) + +#endif From 1efb79086e3298b07f8734aae7614aa25ef82040 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Tue, 31 May 2016 16:32:55 -0600 Subject: [PATCH 09/23] perf/core: adding PMU driver specific configuration It is entirely possible that some PMUs need specific configuration that is currently not found in the perf options before a session can be setup. It is the case for the CoreSight PMU where a sink needs to be provided. That sink doesn't fall in any of the current perf options. As such this patch adds the capability to receive driver specific configuration using the existing ioctl() mechanism. Once the configuration has been pushed down the kernel PMU callbacks are used to deal with the information sent from user space. Signed-off-by: Mathieu Poirier --- include/linux/perf_event.h | 9 +++++++++ include/uapi/linux/perf_event.h | 1 + kernel/events/core.c | 19 +++++++++++++++++++ 3 files changed, 29 insertions(+) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index f9828a48f16a..6e02e62fa1d9 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -391,6 +391,14 @@ struct pmu { * Filter events for PMU-specific reasons. */ int (*filter_match) (struct perf_event *event); /* optional */ + + /* + * Initial, PMU driver specific configuration. + */ + int (*get_drv_configs) (struct perf_event *event, + void __user *arg); /* optional */ + void (*free_drv_configs) (struct perf_event *event); + /* optional */ }; /** @@ -558,6 +566,7 @@ struct perf_event { struct irq_work pending; atomic_t event_limit; + struct list_head drv_configs; void (*destroy)(struct perf_event *); struct rcu_head rcu_head; diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index d801bb0d9f6d..cd2e88450faa 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -395,6 +395,7 @@ struct perf_event_attr { #define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *) #define PERF_EVENT_IOC_ID _IOR('$', 7, __u64 *) #define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32) +#define PERF_EVENT_IOC_SET_DRV_CONFIGS _IOW('$', 10, char *) enum perf_event_ioc_flags { PERF_IOC_FLAG_GROUP = 1U << 0, diff --git a/kernel/events/core.c b/kernel/events/core.c index cda4b292135a..59552da6d6aa 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3729,6 +3729,9 @@ static void __free_event(struct perf_event *event) if (event->destroy) event->destroy(event); + if (event->pmu->free_drv_configs) + event->pmu->free_drv_configs(event); + if (event->ctx) put_ctx(event->ctx); @@ -4281,6 +4284,8 @@ static int perf_event_set_output(struct perf_event *event, struct perf_event *output_event); static int perf_event_set_filter(struct perf_event *event, void __user *arg); static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd); +static int perf_event_drv_configs(struct perf_event *event, + void __user *arg); static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned long arg) { @@ -4337,6 +4342,9 @@ static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned lon case PERF_EVENT_IOC_SET_BPF: return perf_event_set_bpf_prog(event, arg); + case PERF_EVENT_IOC_SET_DRV_CONFIGS: + return perf_event_drv_configs(event, (void __user *)arg); + default: return -ENOTTY; } @@ -4369,6 +4377,7 @@ static long perf_compat_ioctl(struct file *file, unsigned int cmd, switch (_IOC_NR(cmd)) { case _IOC_NR(PERF_EVENT_IOC_SET_FILTER): case _IOC_NR(PERF_EVENT_IOC_ID): + case _IOC_NR(PERF_EVENT_IOC_SET_DRV_CONFIGS): /* Fix up pointer size (usually 4 -> 8 in 32-on-64-bit case */ if (_IOC_SIZE(cmd) == sizeof(compat_uptr_t)) { cmd &= ~IOCSIZE_MASK; @@ -7268,6 +7277,15 @@ void perf_bp_event(struct perf_event *bp, void *data) } #endif +static int perf_event_drv_configs(struct perf_event *event, + void __user *arg) +{ + if (!event->pmu->get_drv_configs) + return -EINVAL; + + return event->pmu->get_drv_configs(event, arg); +} + /* * hrtimer based swevent callback */ @@ -8004,6 +8022,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, INIT_LIST_HEAD(&event->sibling_list); INIT_LIST_HEAD(&event->rb_entry); INIT_LIST_HEAD(&event->active_entry); + INIT_LIST_HEAD(&event->drv_configs); INIT_HLIST_NODE(&event->hlist_entry); From e5fd3d6e84b268edb1a3f66e4d974611e3908c00 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Thu, 28 Apr 2016 16:26:25 -0600 Subject: [PATCH 10/23] perf: passing struct perf_event to function setup_aux() Some information, like driver specific configuration, is found in the perf event structure. As such pass a 'struct perf_event' to function setup_aux() rather than just the CPU number so that individual drivers can make the right configuration when setting up a session. Signed-off-by: Mathieu Poirier --- drivers/hwtracing/coresight/coresight-etm-perf.c | 4 ++-- include/linux/perf_event.h | 2 +- kernel/events/ring_buffer.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c index 755125f7917f..f4174f36c5a0 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.c +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c @@ -155,7 +155,7 @@ static void etm_free_aux(void *data) schedule_work(&event_data->work); } -static void *etm_setup_aux(int event_cpu, void **pages, +static void *etm_setup_aux(struct perf_event *event, void **pages, int nr_pages, bool overwrite) { int cpu; @@ -163,7 +163,7 @@ static void *etm_setup_aux(int event_cpu, void **pages, struct coresight_device *sink; struct etm_event_data *event_data = NULL; - event_data = alloc_event_data(event_cpu); + event_data = alloc_event_data(event->cpu); if (!event_data) return NULL; diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 6e02e62fa1d9..ece8b9629a47 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -378,7 +378,7 @@ struct pmu { /* * Set up pmu-private data structures for an AUX area */ - void *(*setup_aux) (int cpu, void **pages, + void *(*setup_aux) (struct perf_event *event, void **pages, int nr_pages, bool overwrite); /* optional */ diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 084be7d41bcf..8c60a4eb4080 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -562,7 +562,7 @@ int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event, goto out; } - rb->aux_priv = event->pmu->setup_aux(event->cpu, rb->aux_pages, nr_pages, + rb->aux_priv = event->pmu->setup_aux(event, rb->aux_pages, nr_pages, overwrite); if (!rb->aux_priv) goto out; From 376daf04d54d5daa96282dced8e1df9e7b07446b Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Fri, 29 Apr 2016 13:22:59 -0600 Subject: [PATCH 11/23] coresight: adding sink parameter to function coresight_build_path() Up to now function coresight_build_path() was counting on a sink to have been selected (from sysFS) prior to being called. This patch adds a string argument so that a sink matching the argument can be selected. Signed-off-by: Mathieu Poirier --- .../hwtracing/coresight/coresight-etm-perf.c | 2 +- drivers/hwtracing/coresight/coresight-priv.h | 3 +- drivers/hwtracing/coresight/coresight.c | 40 ++++++++++++------- 3 files changed, 29 insertions(+), 16 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c index f4174f36c5a0..f8c7a8733b23 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.c +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c @@ -184,7 +184,7 @@ static void *etm_setup_aux(struct perf_event *event, void **pages, * list of devices from source to sink that can be * referenced later when the path is actually needed. */ - event_data->path[cpu] = coresight_build_path(csdev); + event_data->path[cpu] = coresight_build_path(csdev, NULL); if (!event_data->path[cpu]) goto err; } diff --git a/drivers/hwtracing/coresight/coresight-priv.h b/drivers/hwtracing/coresight/coresight-priv.h index ad975c58080d..3cb574b3cdd9 100644 --- a/drivers/hwtracing/coresight/coresight-priv.h +++ b/drivers/hwtracing/coresight/coresight-priv.h @@ -94,7 +94,8 @@ static inline void CS_UNLOCK(void __iomem *addr) void coresight_disable_path(struct list_head *path); int coresight_enable_path(struct list_head *path, u32 mode); struct coresight_device *coresight_get_sink(struct list_head *path); -struct list_head *coresight_build_path(struct coresight_device *csdev); +struct list_head *coresight_build_path(struct coresight_device *csdev, + const char *sink); void coresight_release_path(struct list_head *path); #ifdef CONFIG_CORESIGHT_SOURCE_ETM3X diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c index d08d1ab9bba5..508532b3fcac 100644 --- a/drivers/hwtracing/coresight/coresight.c +++ b/drivers/hwtracing/coresight/coresight.c @@ -371,31 +371,42 @@ struct coresight_device *coresight_get_sink(struct list_head *path) /** * _coresight_build_path - recursively build a path from a @csdev to a sink. * @csdev: The device to start from. + * @sink: The name of the sink this path should connect with. * @path: The list to add devices to. * - * The tree of Coresight device is traversed until an activated sink is - * found. From there the sink is added to the list along with all the - * devices that led to that point - the end result is a list from source - * to sink. In that list the source is the first device and the sink the - * last one. + * The tree of Coresight device is traversed until an activated sink or + * the one specified by @sink is found. + * From there the sink is added to the list along with all the devices that + * led to that point - the end result is a list from source to sink. In that + * list the source is the first device and the sink the last one. */ static int _coresight_build_path(struct coresight_device *csdev, - struct list_head *path) + const char *sink, struct list_head *path) { int i; bool found = false; struct coresight_node *node; - /* An activated sink has been found. Enqueue the element */ - if ((csdev->type == CORESIGHT_DEV_TYPE_SINK || - csdev->type == CORESIGHT_DEV_TYPE_LINKSINK) && csdev->activated) - goto out; + /* + * First see if we are dealing with a sink. If we have one check if + * it was selected via sysFS or the perf cmd line. + */ + if (csdev->type == CORESIGHT_DEV_TYPE_SINK || + csdev->type == CORESIGHT_DEV_TYPE_LINKSINK) { + /* Activated via perf cmd line */ + if (sink && !strcmp(dev_name(&csdev->dev), sink)) + goto out; + /* Activatred via sysFS */ + if (csdev->activated) + goto out; + } /* Not a sink - recursively explore each port found on this element */ for (i = 0; i < csdev->nr_outport; i++) { struct coresight_device *child_dev = csdev->conns[i].child_dev; - if (child_dev && _coresight_build_path(child_dev, path) == 0) { + if (child_dev && + _coresight_build_path(child_dev, sink, path) == 0) { found = true; break; } @@ -422,7 +433,8 @@ static int _coresight_build_path(struct coresight_device *csdev, return 0; } -struct list_head *coresight_build_path(struct coresight_device *csdev) +struct list_head *coresight_build_path(struct coresight_device *csdev, + const char *sink) { struct list_head *path; int rc; @@ -433,7 +445,7 @@ struct list_head *coresight_build_path(struct coresight_device *csdev) INIT_LIST_HEAD(path); - rc = _coresight_build_path(csdev, path); + rc = _coresight_build_path(csdev, sink, path); if (rc) { kfree(path); return ERR_PTR(rc); @@ -508,7 +520,7 @@ int coresight_enable(struct coresight_device *csdev) if (csdev->enable) goto out; - path = coresight_build_path(csdev); + path = coresight_build_path(csdev, NULL); if (IS_ERR(path)) { pr_err("building path(s) failed\n"); ret = PTR_ERR(path); From 809a241f08c44c0e31b1390f75a35c74f9d87854 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Fri, 29 Apr 2016 13:29:12 -0600 Subject: [PATCH 12/23] coresight: etm-perf: incorporating sink definition from the cmd line Now that PMU specific configuration is available as part of the event, lookup the sink identified by users from the perf command line and build a path from source to sink. With this functionality it is no longer required to select a sink in a separate step (from sysFS) before a perf trace session can be started. Signed-off-by: Mathieu Poirier --- .../hwtracing/coresight/coresight-etm-perf.c | 119 +++++++++++++++++- 1 file changed, 118 insertions(+), 1 deletion(-) diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c index f8c7a8733b23..8fbb1dd9e243 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.c +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -46,6 +47,17 @@ struct etm_event_data { struct list_head **path; }; +/** + * struct perf_pmu_drv_config - Driver specific configuration needed + * before a session can start. + * @sink: The name of the sink this session should use. + * @entry: Hook to the event->drv_configs list. + */ +struct perf_pmu_drv_config { + char *sink; + struct list_head entry; +}; + static DEFINE_PER_CPU(struct perf_output_handle, ctx_handle); static DEFINE_PER_CPU(struct coresight_device *, csdev_src); @@ -159,9 +171,22 @@ static void *etm_setup_aux(struct perf_event *event, void **pages, int nr_pages, bool overwrite) { int cpu; + char *sink_def = NULL; cpumask_t *mask; struct coresight_device *sink; struct etm_event_data *event_data = NULL; + struct perf_pmu_drv_config *drv_config; + + /* + * Search the driver configurables looking for a sink. If more than + * one sink was specified the last one is taken. + */ + list_for_each_entry(drv_config, &event->drv_configs, entry) { + if (drv_config && drv_config->sink) { + sink_def = drv_config->sink; + break; + } + } event_data = alloc_event_data(event->cpu); if (!event_data) @@ -184,7 +209,7 @@ static void *etm_setup_aux(struct perf_event *event, void **pages, * list of devices from source to sink that can be * referenced later when the path is actually needed. */ - event_data->path[cpu] = coresight_build_path(csdev, NULL); + event_data->path[cpu] = coresight_build_path(csdev, sink_def); if (!event_data->path[cpu]) goto err; } @@ -342,6 +367,95 @@ static void etm_event_del(struct perf_event *event, int mode) etm_event_stop(event, PERF_EF_UPDATE); } +enum { + ETM_TOKEN_SINK_CPU, + ETM_TOKEN_SINK, + ETM_TOKEN_ERR, +}; + +static const match_table_t drv_cfg_tokens = { + {ETM_TOKEN_SINK_CPU, "sink=cpu%d:%s"}, + {ETM_TOKEN_SINK, "sink=%s"}, + {ETM_TOKEN_ERR, NULL}, +}; + +static int etm_get_drv_configs(struct perf_event *event, void __user *arg) +{ + char *config, *sink = NULL; + int cpu = -1, token, ret = 0; + substring_t args[MAX_OPT_ARGS]; + struct perf_pmu_drv_config *drv_config = NULL; + + /* Make user supplied input usable */ + config = strndup_user(arg, PAGE_SIZE); + if (IS_ERR(config)) + return PTR_ERR(config); + + /* See above declared @drv_cfg_tokens for the usable formats */ + token = match_token(config, drv_cfg_tokens, args); + switch (token) { + case ETM_TOKEN_SINK: + /* Just a sink has been specified */ + sink = match_strdup(&args[0]); + if (IS_ERR(sink)) { + ret = PTR_ERR(sink); + goto err; + } + break; + case ETM_TOKEN_SINK_CPU: + /* We have a sink and a CPU */ + if (match_int(&args[0], &cpu)) { + ret = -EINVAL; + goto err; + } + sink = match_strdup(&args[1]); + if (IS_ERR(sink)) { + ret = PTR_ERR(sink); + goto err; + } + break; + default: + ret = -EINVAL; + goto err; + } + + /* If the CPUs don't match the sink is destined to another path */ + if (event->cpu != cpu) + goto err; + + /* + * We have a valid configuration, allocate memory and add to the list + * of driver configurables. + */ + drv_config = kzalloc(sizeof(*drv_config), GFP_KERNEL); + if (IS_ERR(drv_config)) { + ret = PTR_ERR(drv_config); + goto err; + } + + drv_config->sink = sink; + list_add(&drv_config->entry, &event->drv_configs); + +out: + kfree(config); + return ret; + +err: + kfree(sink); + goto out; +} + +static void etm_free_drv_configs(struct perf_event *event) +{ + struct perf_pmu_drv_config *config, *itr; + + list_for_each_entry_safe(config, itr, &event->drv_configs, entry) { + list_del(&config->entry); + kfree(config->sink); + kfree(config); + } +} + int etm_perf_symlink(struct coresight_device *csdev, bool link) { char entry[sizeof("cpu9999999")]; @@ -383,6 +497,9 @@ static int __init etm_perf_init(void) etm_pmu.stop = etm_event_stop; etm_pmu.add = etm_event_add; etm_pmu.del = etm_event_del; + etm_pmu.get_drv_configs = etm_get_drv_configs; + etm_pmu.free_drv_configs + = etm_free_drv_configs; ret = perf_pmu_register(&etm_pmu, CORESIGHT_ETM_PMU_NAME, -1); if (ret == 0) From 6a453d5d51ffc238d32d491c22b75fc9540b491c Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Fri, 29 Apr 2016 21:21:11 +0000 Subject: [PATCH 13/23] perf tools: add infrastructure for PMU specific configuration This patchset adds PMU driver specific configuration to the parser infrastructure by preceding any term with the '@' letter. As such doing something like: perf -e some_event/@drv1,@drv2=drv_config/ ... will see 'drv1' and 'drv2=config' being added to the list of evsel config terms. Token 'drv1' and 'drv2=config' are not processed in user space and are meant to be interpreted by the PMU driver. First the lexer/parser are supplemented with the required definitions to recognise the driver specific configuration. From there they are simply added to the list of event terms. The bulk of the work is done in function "parse_events_add_pmu()" where driver config event terms are added to a new list of driver config terms, which in turn spliced with the event's new driver configuration list. Signed-off-by: Mathieu Poirier --- tools/perf/util/evsel.c | 1 + tools/perf/util/evsel.h | 4 ++ tools/perf/util/parse-events.c | 67 +++++++++++++++++++++++++--------- tools/perf/util/parse-events.h | 3 +- tools/perf/util/parse-events.l | 12 ++++++ tools/perf/util/parse-events.y | 11 ++++++ 6 files changed, 80 insertions(+), 18 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 1eb4d02e3968..47f871c3c1c7 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -211,6 +211,7 @@ void perf_evsel__init(struct perf_evsel *evsel, evsel->bpf_fd = -1; INIT_LIST_HEAD(&evsel->node); INIT_LIST_HEAD(&evsel->config_terms); + INIT_LIST_HEAD(&evsel->drv_config_terms); perf_evsel__object.init(evsel); evsel->sample_size = __perf_evsel__sample_size(attr->sample_type); perf_evsel__calc_id_pos(evsel); diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index eaa4c733c976..ac94478feaab 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -44,6 +44,7 @@ enum { PERF_EVSEL__CONFIG_TERM_CALLGRAPH, PERF_EVSEL__CONFIG_TERM_STACK_USER, PERF_EVSEL__CONFIG_TERM_INHERIT, + PERF_EVSEL__CONFIG_TERM_DRV_CFG, PERF_EVSEL__CONFIG_TERM_MAX, }; @@ -55,6 +56,7 @@ struct perf_evsel_config_term { u64 freq; bool time; char *callgraph; + char *drv_cfg; u64 stack_user; bool inherit; } val; @@ -75,6 +77,7 @@ struct perf_evsel_config_term { * PERF_SAMPLE_IDENTIFIER) in a non-sample event i.e. if sample_id_all * is used there is an id sample appended to non-sample events * @priv: And what is in its containing unnamed union are tool specific + * @drv_config_terms: List of configurables sent directly to the PMU driver */ struct perf_evsel { struct list_head node; @@ -123,6 +126,7 @@ struct perf_evsel { char *group_name; bool cmdline_group_boundary; struct list_head config_terms; + struct list_head drv_config_terms; int bpf_fd; }; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index b48e87693aa5..ec4ba419501a 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -285,7 +285,8 @@ static struct perf_evsel * __add_event(struct list_head *list, int *idx, struct perf_event_attr *attr, char *name, struct cpu_map *cpus, - struct list_head *config_terms) + struct list_head *config_terms, + struct list_head *drv_config_terms) { struct perf_evsel *evsel; @@ -304,6 +305,9 @@ __add_event(struct list_head *list, int *idx, if (config_terms) list_splice(config_terms, &evsel->config_terms); + if (drv_config_terms) + list_splice(drv_config_terms, &evsel->drv_config_terms); + list_add_tail(&evsel->node, list); return evsel; } @@ -312,7 +316,8 @@ static int add_event(struct list_head *list, int *idx, struct perf_event_attr *attr, char *name, struct list_head *config_terms) { - return __add_event(list, idx, attr, name, NULL, config_terms) ? 0 : -ENOMEM; + return __add_event(list, idx, attr, name, + NULL, config_terms, NULL) ? 0 : -ENOMEM; } static int parse_aliases(char *str, const char *names[][PERF_EVSEL__MAX_ALIASES], int size) @@ -823,7 +828,8 @@ static int config_term_pmu(struct perf_event_attr *attr, struct parse_events_term *term, struct parse_events_error *err) { - if (term->type_term == PARSE_EVENTS__TERM_TYPE_USER) + if (term->type_term == PARSE_EVENTS__TERM_TYPE_USER || + term->type_term == PARSE_EVENTS__TERM_TYPE_DRV_CFG) /* * Always succeed for sysfs terms, as we dont know * at this point what type they need to have. @@ -869,10 +875,7 @@ static int config_attr(struct perf_event_attr *attr, return 0; } -static int get_config_terms(struct list_head *head_config, - struct list_head *head_terms __maybe_unused) -{ -#define ADD_CONFIG_TERM(__type, __name, __val) \ +#define ADD_CONFIG_TERM(__type, __name, __val, __head_terms) \ do { \ struct perf_evsel_config_term *__t; \ \ @@ -883,33 +886,43 @@ do { \ INIT_LIST_HEAD(&__t->list); \ __t->type = PERF_EVSEL__CONFIG_TERM_ ## __type; \ __t->val.__name = __val; \ - list_add_tail(&__t->list, head_terms); \ + list_add_tail(&__t->list, __head_terms); \ } while (0) +static int get_config_terms(struct list_head *head_config, + struct list_head *head_terms __maybe_unused) +{ struct parse_events_term *term; list_for_each_entry(term, head_config, list) { switch (term->type_term) { case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD: - ADD_CONFIG_TERM(PERIOD, period, term->val.num); + ADD_CONFIG_TERM(PERIOD, period, + term->val.num, head_terms); break; case PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ: - ADD_CONFIG_TERM(FREQ, freq, term->val.num); + ADD_CONFIG_TERM(FREQ, freq, + term->val.num, head_terms); break; case PARSE_EVENTS__TERM_TYPE_TIME: - ADD_CONFIG_TERM(TIME, time, term->val.num); + ADD_CONFIG_TERM(TIME, time, + term->val.num, head_terms); break; case PARSE_EVENTS__TERM_TYPE_CALLGRAPH: - ADD_CONFIG_TERM(CALLGRAPH, callgraph, term->val.str); + ADD_CONFIG_TERM(CALLGRAPH, callgraph, + term->val.str, head_terms); break; case PARSE_EVENTS__TERM_TYPE_STACKSIZE: - ADD_CONFIG_TERM(STACK_USER, stack_user, term->val.num); + ADD_CONFIG_TERM(STACK_USER, stack_user, + term->val.num, head_terms); break; case PARSE_EVENTS__TERM_TYPE_INHERIT: - ADD_CONFIG_TERM(INHERIT, inherit, term->val.num ? 1 : 0); + ADD_CONFIG_TERM(INHERIT, inherit, + term->val.num ? 1 : 0, head_terms); break; case PARSE_EVENTS__TERM_TYPE_NOINHERIT: - ADD_CONFIG_TERM(INHERIT, inherit, term->val.num ? 0 : 1); + ADD_CONFIG_TERM(INHERIT, inherit, + term->val.num ? 0 : 1, head_terms); break; default: break; @@ -919,6 +932,21 @@ do { \ return 0; } +static int get_drv_config_terms(struct list_head *head_config, + struct list_head *head_terms) +{ + struct parse_events_term *term; + + list_for_each_entry(term, head_config, list) { + if (term->type_term != PARSE_EVENTS__TERM_TYPE_DRV_CFG) + continue; + + ADD_CONFIG_TERM(DRV_CFG, drv_cfg, term->val.str, head_terms); + } + + return 0; +} + int parse_events_add_tracepoint(struct list_head *list, int *idx, char *sys, char *event, struct parse_events_error *err, @@ -989,6 +1017,7 @@ int parse_events_add_pmu(struct parse_events_evlist *data, struct perf_pmu *pmu; struct perf_evsel *evsel; LIST_HEAD(config_terms); + LIST_HEAD(drv_config_terms); pmu = perf_pmu__find(name); if (!pmu) @@ -1003,7 +1032,8 @@ int parse_events_add_pmu(struct parse_events_evlist *data, if (!head_config) { attr.type = pmu->type; - evsel = __add_event(list, &data->idx, &attr, NULL, pmu->cpus, NULL); + evsel = __add_event(list, &data->idx, &attr, + NULL, pmu->cpus, NULL, NULL); return evsel ? 0 : -ENOMEM; } @@ -1020,12 +1050,15 @@ int parse_events_add_pmu(struct parse_events_evlist *data, if (get_config_terms(head_config, &config_terms)) return -ENOMEM; + if (get_drv_config_terms(head_config, &drv_config_terms)) + return -ENOMEM; + if (perf_pmu__config(pmu, &attr, head_config, data->error)) return -EINVAL; evsel = __add_event(list, &data->idx, &attr, pmu_event_name(head_config), pmu->cpus, - &config_terms); + &config_terms, &drv_config_terms); if (evsel) { evsel->unit = info.unit; evsel->scale = info.scale; diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index f1a6db107241..09c3ee2df45c 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -68,7 +68,8 @@ enum { PARSE_EVENTS__TERM_TYPE_CALLGRAPH, PARSE_EVENTS__TERM_TYPE_STACKSIZE, PARSE_EVENTS__TERM_TYPE_NOINHERIT, - PARSE_EVENTS__TERM_TYPE_INHERIT + PARSE_EVENTS__TERM_TYPE_INHERIT, + PARSE_EVENTS__TERM_TYPE_DRV_CFG, }; struct parse_events_term { diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 58c5831ffd5c..de260ed0dd54 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -53,6 +53,16 @@ static int str(yyscan_t scanner, int token) return token; } +static int drv_str(yyscan_t scanner, int token) +{ + YYSTYPE *yylval = parse_events_get_lval(scanner); + char *text = parse_events_get_text(scanner); + + /* Strip off the '@' */ + yylval->str = strdup(text + 1); + return token; +} + #define REWIND(__alloc) \ do { \ YYSTYPE *__yylval = parse_events_get_lval(yyscanner); \ @@ -123,6 +133,7 @@ num_hex 0x[a-fA-F0-9]+ num_raw_hex [a-fA-F0-9]+ name [a-zA-Z_*?][a-zA-Z0-9_*?.]* name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.]* +drv_cfg_term [a-zA-Z0-9_\.]+(=[a-zA-Z0-9_*?\.:]+)? /* If you add a modifier you need to update check_modifier() */ modifier_event [ukhpPGHSDI]+ modifier_bp [rwx]{1,3} @@ -196,6 +207,7 @@ no-inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOINHERIT); } , { return ','; } "/" { BEGIN(INITIAL); return '/'; } {name_minus} { return str(yyscanner, PE_NAME); } +@{drv_cfg_term} { return drv_str(yyscanner, PE_DRV_CFG_TERM); } } { diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index ad379968d4c1..d35c10275ba4 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -48,6 +48,7 @@ static inc_group_count(struct list_head *list, %token PE_PREFIX_MEM PE_PREFIX_RAW PE_PREFIX_GROUP %token PE_ERROR %token PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT +%token PE_DRV_CFG_TERM %type PE_VALUE %type PE_VALUE_SYM_HW %type PE_VALUE_SYM_SW @@ -62,6 +63,7 @@ static inc_group_count(struct list_head *list, %type PE_MODIFIER_BP %type PE_EVENT_NAME %type PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT +%type PE_DRV_CFG_TERM %type value_sym %type event_config %type event_term @@ -573,6 +575,15 @@ PE_TERM ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, 1, &@1, NULL)); $$ = term; } +| +PE_DRV_CFG_TERM +{ + struct parse_events_term *term; + + ABORT_ON(parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_DRV_CFG, + $1, $1, &@1, NULL)); + $$ = term; +} sep_dc: ':' | From 55ab3786296f490cc921a1acc244dc6c22c37201 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Fri, 29 Apr 2016 22:04:48 +0000 Subject: [PATCH 14/23] perf tools: pushing driver configuration down to the kernel Now that PMU specific driver configuration are queued in evsel::drv_config_terms, all we need to do is re-use the current ioctl() mechanism to push down the information to the kernel driver. Signed-off-by: Mathieu Poirier --- tools/perf/builtin-record.c | 9 +++++++++ tools/perf/util/evlist.c | 24 ++++++++++++++++++++++++ tools/perf/util/evlist.h | 3 +++ tools/perf/util/evsel.c | 32 ++++++++++++++++++++++++++++++++ tools/perf/util/evsel.h | 3 +++ 5 files changed, 71 insertions(+) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 199fc31e3919..1b9decd5fbf1 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -276,6 +276,7 @@ static int record__open(struct record *rec) struct perf_evlist *evlist = rec->evlist; struct perf_session *session = rec->session; struct record_opts *opts = &rec->opts; + struct perf_evsel_config_term *err_term; int rc = 0; perf_evlist__config(evlist, opts); @@ -305,6 +306,14 @@ static int record__open(struct record *rec) goto out; } + if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) { + error("failed to set config \"%s\" on event %s with %d (%s)\n", + err_term->val.drv_cfg, perf_evsel__name(pos), errno, + strerror_r(errno, msg, sizeof(msg))); + rc = -1; + goto out; + } + if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false, opts->auxtrace_mmap_pages, opts->auxtrace_snapshot_mode) < 0) { diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 5da08efda32f..b54ee9f1cffd 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1247,6 +1247,30 @@ int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **e return err; } +int perf_evlist__apply_drv_configs(struct perf_evlist *evlist, + struct perf_evsel **err_evsel, + struct perf_evsel_config_term **err_term) +{ + struct perf_evsel *evsel; + int err = 0; + const int ncpus = cpu_map__nr(evlist->cpus), + nthreads = thread_map__nr(evlist->threads); + + evlist__for_each(evlist, evsel) { + if (list_empty(&evsel->drv_config_terms)) + continue; + + err = perf_evsel__apply_drv_configs(evsel, ncpus, + nthreads, err_term); + if (err) { + *err_evsel = evsel; + break; + } + } + + return err; +} + int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter) { struct perf_evsel *evsel; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index a459fe71b452..ae5c1eb1d08c 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -163,6 +163,9 @@ void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus, struct thread_map *threads); int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target); int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel); +int perf_evlist__apply_drv_configs(struct perf_evlist *evlist, + struct perf_evsel **err_evsel, + struct perf_evsel_config_term **term); void __perf_evlist__set_leader(struct list_head *list); void perf_evlist__set_leader(struct perf_evlist *evlist); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 47f871c3c1c7..39a8bd842d0d 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -982,6 +982,27 @@ int perf_evsel__append_filter(struct perf_evsel *evsel, return -1; } +int perf_evsel__apply_drv_configs(struct perf_evsel *evsel, + int ncpus, int nthreads, + struct perf_evsel_config_term **err_term) +{ + int err = 0; + struct perf_evsel_config_term *term; + + list_for_each_entry(term, &evsel->drv_config_terms, list) { + err = perf_evsel__run_ioctl(evsel, ncpus, nthreads, + PERF_EVENT_IOC_SET_DRV_CONFIGS, + (void *)term->val.drv_cfg); + + if (err) { + *err_term = term; + break; + } + } + + return err; +} + int perf_evsel__enable(struct perf_evsel *evsel, int ncpus, int nthreads) { return perf_evsel__run_ioctl(evsel, ncpus, nthreads, @@ -1044,6 +1065,16 @@ static void perf_evsel__free_config_terms(struct perf_evsel *evsel) } } +static void perf_evsel__free_drv_config_terms(struct perf_evsel *evsel) +{ + struct perf_evsel_config_term *term, *h; + + list_for_each_entry_safe(term, h, &evsel->drv_config_terms, list) { + list_del(&term->list); + free(term); + } +} + void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads) { int cpu, thread; @@ -1065,6 +1096,7 @@ void perf_evsel__exit(struct perf_evsel *evsel) perf_evsel__free_fd(evsel); perf_evsel__free_id(evsel); perf_evsel__free_config_terms(evsel); + perf_evsel__free_drv_config_terms(evsel); close_cgroup(evsel->cgrp); cpu_map__put(evsel->cpus); cpu_map__put(evsel->own_cpus); diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index ac94478feaab..b649143ac16b 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -231,6 +231,9 @@ int perf_evsel__append_filter(struct perf_evsel *evsel, const char *op, const char *filter); int perf_evsel__apply_filter(struct perf_evsel *evsel, int ncpus, int nthreads, const char *filter); +int perf_evsel__apply_drv_configs(struct perf_evsel *evsel, + int ncpus, int nthreads, + struct perf_evsel_config_term **err_term); int perf_evsel__enable(struct perf_evsel *evsel, int ncpus, int nthreads); int perf_evsel__disable(struct perf_evsel *evsel); From 56139d02e29f968d349571850e832da942ff516b Mon Sep 17 00:00:00 2001 From: tor-jeremiassen Date: Wed, 17 Feb 2016 08:29:21 -0600 Subject: [PATCH 15/23] perf symbols: Check before overwriting build_id Added check to see if has_build_id is set before overwriting build_id. Signed-off-by: Tor Jeremiassen --- tools/perf/util/symbol-minimal.c | 2 +- tools/perf/util/symbol.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index 48906333a858..9be16712ce74 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -344,7 +344,7 @@ int dso__load_sym(struct dso *dso, struct map *map __maybe_unused, if (ret >= 0) dso->is_64_bit = ret; - if (filename__read_build_id(ss->name, build_id, BUILD_ID_SIZE) > 0) { + if ((!dso->has_build_id) && (filename__read_build_id(ss->name, build_id, BUILD_ID_SIZE) > 0)) { dso__set_build_id(dso, build_id); } return 0; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index cd08027a6d2c..1d0d8bff4a5b 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1465,7 +1465,8 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter) * Read the build id if possible. This is required for * DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work */ - if (filename__read_build_id(dso->name, build_id, BUILD_ID_SIZE) > 0) + if ((!dso->has_build_id) && + (filename__read_build_id(dso->name, build_id, BUILD_ID_SIZE) > 0)) dso__set_build_id(dso, build_id); /* From 1265cc36fba27539b82603a6950488ee1c8da70e Mon Sep 17 00:00:00 2001 From: tor-jeremiassen Date: Tue, 9 Feb 2016 10:34:51 -0600 Subject: [PATCH 16/23] perf tools: decoding capailitity for CoreSight traces Added user space perf functionality for CoreSight trace decoding. --- tools/perf/Makefile.perf | 3 + tools/perf/builtin-script.c | 3 +- tools/perf/config/Makefile | 18 + tools/perf/util/Build | 2 + tools/perf/util/auxtrace.c | 2 + tools/perf/util/build-id.c | 2 +- tools/perf/util/build-id.h | 1 + tools/perf/util/cs-etm-decoder/Build | 7 + .../util/cs-etm-decoder/cs-etm-decoder-stub.c | 91 + .../perf/util/cs-etm-decoder/cs-etm-decoder.c | 488 ++++++ .../perf/util/cs-etm-decoder/cs-etm-decoder.h | 117 ++ tools/perf/util/cs-etm.c | 1472 +++++++++++++++++ tools/perf/util/cs-etm.h | 3 + tools/perf/util/machine.c | 35 +- .../scripting-engines/trace-event-python.c | 2 + 15 files changed, 2237 insertions(+), 9 deletions(-) create mode 100644 tools/perf/util/cs-etm-decoder/Build create mode 100644 tools/perf/util/cs-etm-decoder/cs-etm-decoder-stub.c create mode 100644 tools/perf/util/cs-etm-decoder/cs-etm-decoder.c create mode 100644 tools/perf/util/cs-etm-decoder/cs-etm-decoder.h create mode 100644 tools/perf/util/cs-etm.c diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 0d19d5447d6c..34846e71fdbd 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -77,6 +77,9 @@ include config/utilities.mak # Define NO_AUXTRACE if you do not want AUX area tracing support # # Define NO_LIBBPF if you do not want BPF support +# +# Define NO_CSTRACE if you do not want CoreSight trace decoding support +# # As per kernel Makefile, avoid funny character set dependencies unexport LC_ALL diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 72b5deb4bd79..368d1e1561f7 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -92,7 +92,8 @@ static struct { .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID | PERF_OUTPUT_CPU | PERF_OUTPUT_TIME | - PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP | + PERF_OUTPUT_EVNAME | PERF_OUTPUT_ADDR | + PERF_OUTPUT_IP | PERF_OUTPUT_SYM | PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD, diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 86fc47751ca4..033d94114bab 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -433,6 +433,24 @@ endif grep-libs = $(filter -l%,$(1)) strip-libs = $(filter-out -l%,$(1)) +ifdef CSTRACE_PATH + ifeq (${IS_64_BIT}, 1) + CSTRACE_LNX = linux64 + else + CSTRACE_LNX = linux + endif + ifdef DEBUG + LIBCSTRACE = -lcstraced_c_api -lcstraced + CSTRACE_LIB_PATH = $(CSTRACE_PATH)/lib/$(CSTRACE_LNX)/dbg + else + LIBCSTRACE = -lcstrace_c_api -lcstrace + CSTRACE_LIB_PATH = $(CSTRACE_PATH)/lib/$(CSTRACE_LNX)/rel + endif + $(call detected,CSTRACE) + $(call detected_var,CSTRACE_PATH) + EXTLIBS += -L$(CSTRACE_LIB_PATH) $(LIBCSTRACE) -lstdc++ +endif + ifdef NO_LIBPERL CFLAGS += -DNO_LIBPERL else diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 591b3fe3ed49..a8d806503a45 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -84,6 +84,8 @@ libperf-$(CONFIG_AUXTRACE) += auxtrace.o libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/ libperf-$(CONFIG_AUXTRACE) += intel-pt.o libperf-$(CONFIG_AUXTRACE) += intel-bts.o +libperf-$(CONFIG_AUXTRACE) += cs-etm.o +libperf-$(CONFIG_AUXTRACE) += cs-etm-decoder/ libperf-y += parse-branch-options.o libperf-y += parse-regs-options.o diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index a6f291dbc4d9..67551225764e 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -49,6 +49,7 @@ #include "intel-pt.h" #include "intel-bts.h" +#include "cs-etm.h" int auxtrace_mmap__mmap(struct auxtrace_mmap *mm, struct auxtrace_mmap_params *mp, @@ -893,6 +894,7 @@ int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused, case PERF_AUXTRACE_INTEL_BTS: return intel_bts_process_auxtrace_info(event, session); case PERF_AUXTRACE_CS_ETM: + return cs_etm__process_auxtrace_info(event, session); case PERF_AUXTRACE_UNKNOWN: default: return -EINVAL; diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 6a7e273a514a..52d320e922e3 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -145,7 +145,7 @@ static int asnprintf(char **strp, size_t size, const char *fmt, ...) return ret; } -static char *build_id__filename(const char *sbuild_id, char *bf, size_t size) +char *build_id__filename(const char *sbuild_id, char *bf, size_t size) { char *tmp = bf; int ret = asnprintf(&bf, size, "%s/.build-id/%.2s/%s", buildid_dir, diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h index 27a14a8a945b..eb2c2b6e1dab 100644 --- a/tools/perf/util/build-id.h +++ b/tools/perf/util/build-id.h @@ -11,6 +11,7 @@ extern struct perf_tool build_id__mark_dso_hit_ops; struct dso; +char *build_id__filename(const char *sbuild_id, char *bf, size_t size); int build_id__sprintf(const u8 *build_id, int len, char *bf); int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id); int filename__sprintf_build_id(const char *pathname, char *sbuild_id); diff --git a/tools/perf/util/cs-etm-decoder/Build b/tools/perf/util/cs-etm-decoder/Build new file mode 100644 index 000000000000..d4896fec940c --- /dev/null +++ b/tools/perf/util/cs-etm-decoder/Build @@ -0,0 +1,7 @@ +ifeq ($(CSTRACE_PATH),) +libperf-$(CONFIG_AUXTRACE) += cs-etm-decoder-stub.o +else +CFLAGS_cs-etm-decoder.o += -I$(CSTRACE_PATH)/include +libperf-$(CONFIG_AUXTRACE) += cs-etm-decoder.o +endif + diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder-stub.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder-stub.c new file mode 100644 index 000000000000..38f2b756fe10 --- /dev/null +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder-stub.c @@ -0,0 +1,91 @@ +/* + * + * Copyright(C) 2015 Linaro Limited. All rights reserved. + * Author: Tor Jeremiassen + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General + * Public License for more details. + * + * You should have received a copy of the GNU GEneral Public License along + * with this program. If not, see . + */ + +#include + +#include "cs-etm-decoder.h" +#include "../util.h" + + +struct cs_etm_decoder +{ + void *state; + int dummy; +}; + +int cs_etm_decoder__flush(struct cs_etm_decoder *decoder) +{ + (void) decoder; + return -1; +} + +int cs_etm_decoder__add_bin_file(struct cs_etm_decoder *decoder, uint64_t offset, uint64_t address, uint64_t len, const char *fname) +{ + (void) decoder; + (void) offset; + (void) address; + (void) len; + (void) fname; + return -1; +} + +const struct cs_etm_state *cs_etm_decoder__process_data_block(struct cs_etm_decoder *decoder, + uint64_t indx, + const uint8_t *buf, + size_t len, + size_t *consumed) +{ + (void) decoder; + (void) indx; + (void) buf; + (void) len; + (void) consumed; + return NULL; +} + +int cs_etm_decoder__add_mem_access_cb(struct cs_etm_decoder *decoder, uint64_t address, uint64_t len, cs_etm_mem_cb_type cb_func) +{ + (void) decoder; + (void) address; + (void) len; + (void) cb_func; + return -1; +} + +int cs_etm_decoder__get_packet(struct cs_etm_decoder *decoder, + struct cs_etm_packet *packet) +{ + (void) decoder; + (void) packet; + return -1; +} + +struct cs_etm_decoder *cs_etm_decoder__new(uint32_t num_cpu, struct cs_etm_decoder_params *d_params, struct cs_etm_trace_params t_params[]) +{ + (void) num_cpu; + (void) d_params; + (void) t_params; + return NULL; +} + + +void cs_etm_decoder__free(struct cs_etm_decoder *decoder) +{ + (void) decoder; + return; +} diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c new file mode 100644 index 000000000000..2fef19922fdd --- /dev/null +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -0,0 +1,488 @@ +/* + * + * Copyright(C) 2015 Linaro Limited. All rights reserved. + * Author: Tor Jeremiassen + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General + * Public License for more details. + * + * You should have received a copy of the GNU GEneral Public License along + * with this program. If not, see . + */ + +#include + +#include "cs-etm-decoder.h" +#include "../util.h" + +#include "c_api/rctdl_c_api.h" +#include "rctdl_if_types.h" +#include "etmv4/trc_pkt_types_etmv4.h" + +#define MAX_BUFFER 1024 + + + +struct cs_etm_decoder +{ + struct cs_etm_state state; + dcd_tree_handle_t dcd_tree; + void (*packet_printer)(const char *); + cs_etm_mem_cb_type mem_access; + rctdl_datapath_resp_t prev_return; + size_t prev_processed; + bool trace_on; + bool discontinuity; + struct cs_etm_packet packet_buffer[MAX_BUFFER]; + uint32_t packet_count; + uint32_t head; + uint32_t tail; + uint32_t end_tail; +}; + +static uint32_t cs_etm_decoder__mem_access(const void *context, + const rctdl_vaddr_t address, + const rctdl_mem_space_acc_t mem_space, + const uint32_t req_size, + uint8_t *buffer) +{ + struct cs_etm_decoder *decoder = (struct cs_etm_decoder *) context; + (void) mem_space; + + return decoder->mem_access(decoder->state.data,address,req_size,buffer); +} + +static int cs_etm_decoder__gen_etmv4_config(struct cs_etm_trace_params *params, + rctdl_etmv4_cfg *config) +{ + config->reg_configr = params->reg_configr; + config->reg_traceidr = params->reg_traceidr; + config->reg_idr0 = params->reg_idr0; + config->reg_idr1 = params->reg_idr1; + config->reg_idr2 = params->reg_idr2; + config->reg_idr8 = params->reg_idr8; + + config->reg_idr9 = 0; + config->reg_idr10 = 0; + config->reg_idr11 = 0; + config->reg_idr12 = 0; + config->reg_idr13 = 0; + config->arch_ver = ARCH_V8; + config->core_prof = profile_CortexA; + + return 0; +} + +static int cs_etm_decoder__flush_packet(struct cs_etm_decoder *decoder) +{ + int err = 0; + + if (decoder == NULL) return -1; + + if (decoder->packet_count >= 31) return -1; + + if (decoder->tail != decoder->end_tail) { + decoder->tail = (decoder->tail + 1) & (MAX_BUFFER - 1); + decoder->packet_count++; + } + + return err; +} + +int cs_etm_decoder__flush(struct cs_etm_decoder *decoder) +{ + return cs_etm_decoder__flush_packet(decoder); +} + +static int cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder, const rctdl_generic_trace_elem *elem, enum cs_etm_sample_type sample_type) +{ + int err = 0; + uint32_t et = 0; + + if (decoder == NULL) return -1; + + if (decoder->packet_count >= 31) return -1; + + err = cs_etm_decoder__flush_packet(decoder); + + if (err) return err; + + et = decoder->end_tail; + + decoder->packet_buffer[et].sample_type = sample_type; + decoder->packet_buffer[et].start_addr = elem->st_addr; + decoder->packet_buffer[et].end_addr = elem->en_addr; + decoder->packet_buffer[et].exc = false; + decoder->packet_buffer[et].exc_ret = false; + et = (et + 1) & (MAX_BUFFER - 1); + + decoder->end_tail = et; + + return err; +} + +static int cs_etm_decoder__mark_exception(struct cs_etm_decoder *decoder) +{ + int err = 0; + + if (decoder == NULL) return -1; + + decoder->packet_buffer[decoder->end_tail].exc = true; + + return err; +} + +static int cs_etm_decoder__mark_exception_return(struct cs_etm_decoder *decoder) +{ + int err = 0; + + if (decoder == NULL) return -1; + + decoder->packet_buffer[decoder->end_tail].exc_ret = true; + + return err; +} + +static rctdl_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( + const void *context, + const rctdl_trc_index_t indx, + const uint8_t trace_chan_id, + const rctdl_generic_trace_elem *elem) +{ + rctdl_datapath_resp_t resp = RCTDL_RESP_CONT; + struct cs_etm_decoder *decoder = (struct cs_etm_decoder *) context; + + (void) indx; + (void) trace_chan_id; + + switch (elem->elem_type) { + case RCTDL_GEN_TRC_ELEM_UNKNOWN: + break; + case RCTDL_GEN_TRC_ELEM_NO_SYNC: + decoder->trace_on = false; + break; + case RCTDL_GEN_TRC_ELEM_TRACE_ON: + decoder->trace_on = true; + break; + //case RCTDL_GEN_TRC_ELEM_TRACE_OVERFLOW: + //decoder->trace_on = false; + //decoder->discontinuity = true; + //break; + case RCTDL_GEN_TRC_ELEM_INSTR_RANGE: + cs_etm_decoder__buffer_packet(decoder,elem, CS_ETM_RANGE); + resp = RCTDL_RESP_WAIT; + break; + case RCTDL_GEN_TRC_ELEM_EXCEPTION: + cs_etm_decoder__mark_exception(decoder); + break; + case RCTDL_GEN_TRC_ELEM_EXCEPTION_RET: + cs_etm_decoder__mark_exception_return(decoder); + break; + case RCTDL_GEN_TRC_ELEM_PE_CONTEXT: + case RCTDL_GEN_TRC_ELEM_EO_TRACE: + case RCTDL_GEN_TRC_ELEM_ADDR_NACC: + case RCTDL_GEN_TRC_ELEM_TIMESTAMP: + case RCTDL_GEN_TRC_ELEM_CYCLE_COUNT: + //case RCTDL_GEN_TRC_ELEM_TS_WITH_CC: + case RCTDL_GEN_TRC_ELEM_EVENT: + default: + break; + } + + decoder->state.err = 0; + + return resp; +} + +static rctdl_datapath_resp_t cs_etm_decoder__etmv4i_packet_printer( + const void *context, + const rctdl_datapath_op_t op, + const rctdl_trc_index_t indx, + const rctdl_etmv4_i_pkt *pkt) +{ + const size_t PACKET_STR_LEN = 1024; + rctdl_datapath_resp_t ret = RCTDL_RESP_CONT; + char packet_str[PACKET_STR_LEN]; + size_t offset; + struct cs_etm_decoder *decoder = (struct cs_etm_decoder *) context; + + sprintf(packet_str,"%ld: ", (long int) indx); + offset = strlen(packet_str); + + switch(op) { + case RCTDL_OP_DATA: + if (rctdl_pkt_str(RCTDL_PROTOCOL_ETMV4I, + (void *)pkt, + packet_str+offset, + PACKET_STR_LEN-offset) != RCTDL_OK) + ret = RCTDL_RESP_FATAL_INVALID_PARAM; + break; + case RCTDL_OP_EOT: + sprintf(packet_str,"**** END OF TRACE ****\n"); + break; + case RCTDL_OP_FLUSH: + case RCTDL_OP_RESET: + default: + break; + } + + decoder->packet_printer(packet_str); + + return ret; +} + +static int cs_etm_decoder__create_etmv4i_packet_printer(struct cs_etm_decoder_params *d_params, struct cs_etm_trace_params *t_params, + + struct cs_etm_decoder *decoder) +{ + rctdl_etmv4_cfg trace_config; + int ret = 0; + + if (d_params->packet_printer == NULL) + return -1; + + ret = cs_etm_decoder__gen_etmv4_config(t_params,&trace_config); + + if (ret != 0) + return -1; + + decoder->packet_printer = d_params->packet_printer; + + ret = rctdl_dt_create_etmv4i_pkt_proc(decoder->dcd_tree, + &trace_config, + cs_etm_decoder__etmv4i_packet_printer, + decoder); + + return ret; +} + +static int cs_etm_decoder__create_etmv4i_packet_decoder(struct cs_etm_decoder_params *d_params, struct cs_etm_trace_params *t_params, + struct cs_etm_decoder *decoder) +{ + rctdl_etmv4_cfg trace_config; + int ret = 0; + decoder->packet_printer = d_params->packet_printer; + + ret = cs_etm_decoder__gen_etmv4_config(t_params,&trace_config); + + if (ret != 0) + return -1; + + ret = rctdl_dt_create_etmv4i_decoder(decoder->dcd_tree,&trace_config); + + if (ret != RCTDL_OK) + return -1; + + ret = rctdl_dt_set_gen_elem_outfn(decoder->dcd_tree, + cs_etm_decoder__gen_trace_elem_printer, decoder); + return ret; +} + +int cs_etm_decoder__add_mem_access_cb(struct cs_etm_decoder *decoder, uint64_t address, uint64_t len, cs_etm_mem_cb_type cb_func) +{ + int err; + + decoder->mem_access = cb_func; + err = rctdl_dt_add_callback_mem_acc(decoder->dcd_tree, + address, + address+len-1, + RCTDL_MEM_SPACE_ANY, + cs_etm_decoder__mem_access, + decoder); + return err; +} + + +int cs_etm_decoder__add_bin_file(struct cs_etm_decoder *decoder, uint64_t offset, uint64_t address, uint64_t len, const char *fname) +{ + int err = 0; + file_mem_region_t region; + + (void) len; + if (NULL == decoder) + return -1; + + if (NULL == decoder->dcd_tree) + return -1; + + region.file_offset = offset; + region.start_address = address; + region.region_size = len; + err = rctdl_dt_add_binfile_region_mem_acc(decoder->dcd_tree, + ®ion, + 1, + RCTDL_MEM_SPACE_ANY, + fname); + + return err; +} + +const struct cs_etm_state *cs_etm_decoder__process_data_block(struct cs_etm_decoder *decoder, + uint64_t indx, + const uint8_t *buf, + size_t len, + size_t *consumed) +{ + int ret = 0; + rctdl_datapath_resp_t dp_ret = decoder->prev_return; + size_t processed = 0; + + if (decoder->packet_count > 0) { + decoder->state.err = ret; + *consumed = processed; + return &(decoder->state); + } + + while ((processed < len) && (0 == ret)) { + + if (RCTDL_DATA_RESP_IS_CONT(dp_ret)) { + uint32_t count; + dp_ret = rctdl_dt_process_data(decoder->dcd_tree, + RCTDL_OP_DATA, + indx+processed, + len - processed, + &buf[processed], + &count); + processed += count; + + } else if (RCTDL_DATA_RESP_IS_WAIT(dp_ret)) { + dp_ret = rctdl_dt_process_data(decoder->dcd_tree, + RCTDL_OP_FLUSH, + 0, + 0, + NULL, + NULL); + break; + } else { + ret = -1; + } + } + if (RCTDL_DATA_RESP_IS_WAIT(dp_ret)) { + if (RCTDL_DATA_RESP_IS_CONT(decoder->prev_return)) { + decoder->prev_processed = processed; + } + processed = 0; + } else if (RCTDL_DATA_RESP_IS_WAIT(decoder->prev_return)) { + processed = decoder->prev_processed; + decoder->prev_processed = 0; + } + *consumed = processed; + decoder->prev_return = dp_ret; + decoder->state.err = ret; + return &(decoder->state); +} + +int cs_etm_decoder__get_packet(struct cs_etm_decoder *decoder, + struct cs_etm_packet *packet) +{ + if (decoder->packet_count == 0) return -1; + + if (packet == NULL) return -1; + + *packet = decoder->packet_buffer[decoder->head]; + + decoder->head = (decoder->head + 1) & (MAX_BUFFER - 1); + + decoder->packet_count--; + + return 0; +} + +static void cs_etm_decoder__clear_buffer(struct cs_etm_decoder *decoder) +{ + unsigned i; + + decoder->head = 0; + decoder->tail = 0; + decoder->end_tail = 0; + decoder->packet_count = 0; + for (i = 0; i < MAX_BUFFER; i++) { + decoder->packet_buffer[i].start_addr = 0xdeadbeefdeadbeefUL; + decoder->packet_buffer[i].end_addr = 0xdeadbeefdeadbeefUL; + decoder->packet_buffer[i].exc = false; + decoder->packet_buffer[i].exc_ret = false; + } +} + +struct cs_etm_decoder *cs_etm_decoder__new(uint32_t num_cpu, struct cs_etm_decoder_params *d_params, struct cs_etm_trace_params t_params[]) +{ + struct cs_etm_decoder *decoder; + rctdl_dcd_tree_src_t format; + uint32_t flags; + int ret; + size_t i; + + if ((t_params == NULL) || (d_params == 0)) { + return NULL; + } + + decoder = zalloc(sizeof(struct cs_etm_decoder)); + + if (decoder == NULL) { + return NULL; + } + + decoder->state.data = d_params->data; + decoder->prev_return = RCTDL_RESP_CONT; + cs_etm_decoder__clear_buffer(decoder); + format = (d_params->formatted ? RCTDL_TRC_SRC_FRAME_FORMATTED : + RCTDL_TRC_SRC_SINGLE); + flags = 0; + flags |= (d_params->fsyncs ? RCTDL_DFRMTR_HAS_FSYNCS : 0); + flags |= (d_params->hsyncs ? RCTDL_DFRMTR_HAS_HSYNCS : 0); + flags |= (d_params->frame_aligned ? RCTDL_DFRMTR_FRAME_MEM_ALIGN : 0); + + /* Create decode tree for the data source */ + decoder->dcd_tree = rctdl_create_dcd_tree(format,flags); + + if (decoder->dcd_tree == 0) { + goto err_free_decoder; + } + + for (i = 0; i < num_cpu; ++i) { + switch (t_params[i].protocol) + { + case CS_ETM_PROTO_ETMV4i: + if (d_params->operation == CS_ETM_OPERATION_PRINT) { + ret = cs_etm_decoder__create_etmv4i_packet_printer(d_params,&t_params[i],decoder); + } else if (d_params->operation == CS_ETM_OPERATION_DECODE) { + ret = cs_etm_decoder__create_etmv4i_packet_decoder(d_params,&t_params[i],decoder); + } else { + ret = -CS_ETM_ERR_PARAM; + } + if (ret != 0) { + goto err_free_decoder_tree; + } + break; + default: + goto err_free_decoder_tree; + break; + } + } + + + return decoder; + +err_free_decoder_tree: + rctdl_destroy_dcd_tree(decoder->dcd_tree); +err_free_decoder: + free(decoder); + return NULL; +} + + +void cs_etm_decoder__free(struct cs_etm_decoder *decoder) +{ + if (decoder == NULL) return; + + rctdl_destroy_dcd_tree(decoder->dcd_tree); + decoder->dcd_tree = NULL; + + free(decoder); +} diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h new file mode 100644 index 000000000000..2d1013bcdbab --- /dev/null +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h @@ -0,0 +1,117 @@ +/* + * Copyright(C) 2015 Linaro Limited. All rights reserved. + * Author: Tor Jeremiassen + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General + * Public License for more details. + * + * You should have received a copy of the GNU GEneral Public License along + * with this program. If not, see . + */ + +#ifndef INCLUDE__CS_ETM_DECODER_H__ +#define INCLUDE__CS_ETM_DECODER_H__ + +#include +#include + +struct cs_etm_decoder; + +struct cs_etm_buffer { + const unsigned char *buf; + size_t len; + uint64_t offset; + //bool consecutive; + uint64_t ref_timestamp; + //uint64_t trace_nr; +}; + +enum cs_etm_sample_type { + CS_ETM_RANGE = 1 << 0, +}; + +struct cs_etm_state { + int err; + void *data; + unsigned isa; + uint64_t start; + uint64_t end; + uint64_t timestamp; +}; + +struct cs_etm_packet { + enum cs_etm_sample_type sample_type; + uint64_t start_addr; + uint64_t end_addr; + bool exc; + bool exc_ret; +}; + + +struct cs_etm_queue; +typedef uint32_t (*cs_etm_mem_cb_type)(struct cs_etm_queue *, uint64_t, size_t, uint8_t *); + +struct cs_etm_trace_params { + void *etmv4i_packet_handler; + uint32_t reg_idr0; + uint32_t reg_idr1; + uint32_t reg_idr2; + uint32_t reg_idr8; + uint32_t reg_configr; + uint32_t reg_traceidr; + int protocol; +}; + +struct cs_etm_decoder_params { + int operation; + void (*packet_printer)(const char *); + cs_etm_mem_cb_type mem_acc_cb; + bool formatted; + bool fsyncs; + bool hsyncs; + bool frame_aligned; + void *data; +}; + +enum { + CS_ETM_PROTO_ETMV3 = 1, + CS_ETM_PROTO_ETMV4i, + CS_ETM_PROTO_ETMV4d, +}; + +enum { + CS_ETM_OPERATION_PRINT = 1, + CS_ETM_OPERATION_DECODE, +}; + +enum { + CS_ETM_ERR_NOMEM = 1, + CS_ETM_ERR_NODATA, + CS_ETM_ERR_PARAM, +}; + + +struct cs_etm_decoder *cs_etm_decoder__new(uint32_t num_cpu, struct cs_etm_decoder_params *,struct cs_etm_trace_params []); + +int cs_etm_decoder__add_mem_access_cb(struct cs_etm_decoder *, uint64_t, uint64_t, cs_etm_mem_cb_type); + +int cs_etm_decoder__flush(struct cs_etm_decoder *); +void cs_etm_decoder__free(struct cs_etm_decoder *); +int cs_etm_decoder__get_packet(struct cs_etm_decoder *, struct cs_etm_packet *); + +int cs_etm_decoder__add_bin_file(struct cs_etm_decoder *, uint64_t, uint64_t, uint64_t, const char *); + +const struct cs_etm_state *cs_etm_decoder__process_data_block(struct cs_etm_decoder *, + uint64_t, + const uint8_t *, + size_t, + size_t *); + +#endif /* INCLUDE__CS_ETM_DECODER_H__ */ + diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c new file mode 100644 index 000000000000..0e9a0d07f03a --- /dev/null +++ b/tools/perf/util/cs-etm.c @@ -0,0 +1,1472 @@ +/* + * Copyright(C) 2016 Linaro Limited. All rights reserved. + * Author: Tor Jeremiassen + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#include +#include +#include +#include + +#include "perf.h" +#include "thread_map.h" +#include "thread.h" +#include "thread-stack.h" +#include "callchain.h" +#include "auxtrace.h" +#include "evlist.h" +#include "machine.h" +#include "util.h" +#include "color.h" +#include "cs-etm.h" +#include "cs-etm-decoder/cs-etm-decoder.h" +#include "debug.h" + +#include + +#define KiB(x) ((x) * 1024) +#define MiB(x) ((x) * 1024 * 1024) +#define MAX_TIMESTAMP (~0ULL) + +struct cs_etm_auxtrace { + struct auxtrace auxtrace; + struct auxtrace_queues queues; + struct auxtrace_heap heap; + u64 **metadata; + u32 auxtrace_type; + struct perf_session *session; + struct machine *machine; + struct perf_evsel *switch_evsel; + struct thread *unknown_thread; + uint32_t num_cpu; + bool timeless_decoding; + bool sampling_mode; + bool snapshot_mode; + bool data_queued; + bool sync_switch; + bool synth_needs_swap; + int have_sched_switch; + + bool sample_instructions; + u64 instructions_sample_type; + u64 instructions_sample_period; + u64 instructions_id; + struct itrace_synth_opts synth_opts; + unsigned pmu_type; +}; + +struct cs_etm_queue { + struct cs_etm_auxtrace *etm; + unsigned queue_nr; + struct auxtrace_buffer *buffer; + const struct cs_etm_state *state; + struct ip_callchain *chain; + union perf_event *event_buf; + bool on_heap; + bool step_through_buffers; + bool use_buffer_pid_tid; + pid_t pid, tid; + int cpu; + struct thread *thread; + u64 time; + u64 timestamp; + bool stop; + bool have_sample; + struct cs_etm_decoder *decoder; + u64 offset; + bool eot; + bool kernel_mapped; +}; + +static int cs_etm__get_trace(struct cs_etm_buffer *buff, struct cs_etm_queue *etmq); +static int cs_etm__update_queues(struct cs_etm_auxtrace *); +static int cs_etm__process_queues(struct cs_etm_auxtrace *, u64); +static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *, pid_t, u64); +static uint32_t cs_etm__mem_access(struct cs_etm_queue *, uint64_t , size_t , uint8_t *); + +static void cs_etm__packet_dump(const char *pkt_string) +{ + const char *color = PERF_COLOR_BLUE; + + color_fprintf(stdout,color, " %s\n", pkt_string); + fflush(stdout); +} + +static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, + struct auxtrace_buffer *buffer) +{ + const char *color = PERF_COLOR_BLUE; + struct cs_etm_decoder_params d_params; + struct cs_etm_trace_params *t_params; + struct cs_etm_decoder *decoder; + size_t buffer_used = 0; + size_t i; + + fprintf(stdout,"\n"); + color_fprintf(stdout, color, + ". ... CoreSight ETM Trace data: size %zu bytes\n", + buffer->size); + + t_params = zalloc(sizeof(struct cs_etm_trace_params) * etm->num_cpu); + for (i = 0; i < etm->num_cpu; ++i) { + t_params[i].protocol = CS_ETM_PROTO_ETMV4i; + t_params[i].reg_idr0 = etm->metadata[i][CS_ETMV4_TRCIDR0]; + t_params[i].reg_idr1 = etm->metadata[i][CS_ETMV4_TRCIDR1]; + t_params[i].reg_idr2 = etm->metadata[i][CS_ETMV4_TRCIDR2]; + t_params[i].reg_idr8 = etm->metadata[i][CS_ETMV4_TRCIDR8]; + t_params[i].reg_configr = etm->metadata[i][CS_ETMV4_TRCCONFIGR]; + t_params[i].reg_traceidr = etm->metadata[i][CS_ETMV4_TRCTRACEIDR]; + //[CS_ETMV4_TRCAUTHSTATUS] = " TRCAUTHSTATUS %"PRIx64"\n", + } + d_params.packet_printer = cs_etm__packet_dump; + d_params.operation = CS_ETM_OPERATION_PRINT; + d_params.formatted = true; + d_params.fsyncs = false; + d_params.hsyncs = false; + d_params.frame_aligned = true; + + decoder = cs_etm_decoder__new(etm->num_cpu,&d_params, t_params); + + zfree(&t_params); + + if (decoder == NULL) { + return; + } + do { + size_t consumed; + cs_etm_decoder__process_data_block(decoder,buffer->offset,&(((uint8_t *)buffer->data)[buffer_used]),buffer->size - buffer_used, &consumed); + buffer_used += consumed; + } while(buffer_used < buffer->size); + cs_etm_decoder__free(decoder); +} + +static int cs_etm__flush_events(struct perf_session *session, struct perf_tool *tool){ + struct cs_etm_auxtrace *etm = container_of(session->auxtrace, + struct cs_etm_auxtrace, + auxtrace); + + int ret; + + if (dump_trace) + return 0; + + if (!tool->ordered_events) + return -EINVAL; + + ret = cs_etm__update_queues(etm); + + if (ret < 0) + return ret; + + if (etm->timeless_decoding) + return cs_etm__process_timeless_queues(etm,-1,MAX_TIMESTAMP - 1); + + return cs_etm__process_queues(etm, MAX_TIMESTAMP); +} + +static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm, + struct auxtrace_queue *queue) +{ + struct cs_etm_queue *etmq = queue->priv; + + if ((queue->tid == -1) || (etm->have_sched_switch)) { + etmq->tid = machine__get_current_tid(etm->machine, etmq->cpu); + thread__zput(etmq->thread); + } + + if ((!etmq->thread) && (etmq->tid != -1)) { + etmq->thread = machine__find_thread(etm->machine,-1,etmq->tid); + } + + if (etmq->thread) { + etmq->pid = etmq->thread->pid_; + if (queue->cpu == -1) { + etmq->cpu = etmq->thread->cpu; + } + } +} + +static void cs_etm__free_queue(void *priv) +{ + struct cs_etm_queue *etmq = priv; + + if (!etmq) + return; + + thread__zput(etmq->thread); + cs_etm_decoder__free(etmq->decoder); + zfree(&etmq->event_buf); + zfree(&etmq->chain); + free(etmq); +} + +static void cs_etm__free_events(struct perf_session *session) +{ + struct cs_etm_auxtrace *aux = container_of(session->auxtrace, + struct cs_etm_auxtrace, + auxtrace); + + struct auxtrace_queues *queues = &(aux->queues); + + unsigned i; + + for (i = 0; i < queues->nr_queues; ++i) { + cs_etm__free_queue(queues->queue_array[i].priv); + queues->queue_array[i].priv = 0; + } + + auxtrace_queues__free(queues); + +} + +static void cs_etm__free(struct perf_session *session) +{ + + size_t i; + struct cs_etm_auxtrace *aux = container_of(session->auxtrace, + struct cs_etm_auxtrace, + auxtrace); + auxtrace_heap__free(&aux->heap); + cs_etm__free_events(session); + session->auxtrace = NULL; + //thread__delete(aux->unknown_thread); + for (i = 0; i < aux->num_cpu; ++i) { + zfree(&aux->metadata[i]); + } + zfree(&aux->metadata); + free(aux); +} + +static void cs_etm__use_buffer_pid_tid(struct cs_etm_queue *etmq, + struct auxtrace_queue *queue, + struct auxtrace_buffer *buffer) +{ + if ((queue->cpu == -1) && (buffer->cpu != -1)) { + etmq->cpu = buffer->cpu; + } + + etmq->pid = buffer->pid; + etmq->tid = buffer->tid; + + thread__zput(etmq->thread); + + if (etmq->tid != -1) { + if (etmq->pid != -1) { + etmq->thread = machine__findnew_thread(etmq->etm->machine, + etmq->pid, + etmq->tid); + } else { + etmq->thread = machine__findnew_thread(etmq->etm->machine, + -1, + etmq->tid); + } + } +} + + +static int cs_etm__get_trace(struct cs_etm_buffer *buff, struct cs_etm_queue *etmq) +{ + struct auxtrace_buffer *aux_buffer = etmq->buffer; + struct auxtrace_buffer *old_buffer = aux_buffer; + struct auxtrace_queue *queue; + + if (etmq->stop) { + buff->len = 0; + return 0; + } + + queue = &etmq->etm->queues.queue_array[etmq->queue_nr]; + + aux_buffer = auxtrace_buffer__next(queue,aux_buffer); + + if (!aux_buffer) { + if (old_buffer) { + auxtrace_buffer__drop_data(old_buffer); + } + buff->len = 0; + return 0; + } + + etmq->buffer = aux_buffer; + + if (!aux_buffer->data) { + int fd = perf_data_file__fd(etmq->etm->session->file); + + aux_buffer->data = auxtrace_buffer__get_data(aux_buffer, fd); + if (!aux_buffer->data) + return -ENOMEM; + } + + if (old_buffer) + auxtrace_buffer__drop_data(old_buffer); + + if (aux_buffer->use_data) { + buff->offset = aux_buffer->offset; + buff->len = aux_buffer->use_size; + buff->buf = aux_buffer->use_data; + } else { + buff->offset = aux_buffer->offset; + buff->len = aux_buffer->size; + buff->buf = aux_buffer->data; + } + /* + buff->offset = 0; + buff->len = sizeof(cstrace); + buff->buf = cstrace; + */ + etmq->stop = true; + + buff->ref_timestamp = aux_buffer->reference; + + if (etmq->use_buffer_pid_tid && + ((etmq->pid != aux_buffer->pid) || + (etmq->tid != aux_buffer->tid))) { + cs_etm__use_buffer_pid_tid(etmq,queue,aux_buffer); + } + + if (etmq->step_through_buffers) + etmq->stop = true; + + if (buff->len == 0) + return cs_etm__get_trace(buff,etmq); + + return 0; +} + +static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, + unsigned int queue_nr) +{ + struct cs_etm_decoder_params d_params; + struct cs_etm_trace_params *t_params; + struct cs_etm_queue *etmq; + size_t i; + + etmq = zalloc(sizeof(struct cs_etm_queue)); + if (!etmq) + return NULL; + + if (etm->synth_opts.callchain) { + size_t sz = sizeof(struct ip_callchain); + + sz += etm->synth_opts.callchain_sz * sizeof(u64); + etmq->chain = zalloc(sz); + if (!etmq->chain) + goto out_free; + } else { + etmq->chain = NULL; + } + + etmq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); + if (!etmq->event_buf) + goto out_free; + + etmq->etm = etm; + etmq->queue_nr = queue_nr; + etmq->pid = -1; + etmq->tid = -1; + etmq->cpu = -1; + etmq->stop = false; + etmq->kernel_mapped = false; + + t_params = zalloc(sizeof(struct cs_etm_trace_params)*etm->num_cpu); + + for (i = 0; i < etm->num_cpu; ++i) { + t_params[i].reg_idr0 = etm->metadata[i][CS_ETMV4_TRCIDR0]; + t_params[i].reg_idr1 = etm->metadata[i][CS_ETMV4_TRCIDR1]; + t_params[i].reg_idr2 = etm->metadata[i][CS_ETMV4_TRCIDR2]; + t_params[i].reg_idr8 = etm->metadata[i][CS_ETMV4_TRCIDR8]; + t_params[i].reg_configr = etm->metadata[i][CS_ETMV4_TRCCONFIGR]; + t_params[i].reg_traceidr = etm->metadata[i][CS_ETMV4_TRCTRACEIDR]; + t_params[i].protocol = CS_ETM_PROTO_ETMV4i; + } + d_params.packet_printer = cs_etm__packet_dump; + d_params.operation = CS_ETM_OPERATION_DECODE; + d_params.formatted = true; + d_params.fsyncs = false; + d_params.hsyncs = false; + d_params.frame_aligned = true; + d_params.data = etmq; + + etmq->decoder = cs_etm_decoder__new(etm->num_cpu,&d_params,t_params); + + + zfree(&t_params); + + if (!etmq->decoder) + goto out_free; + + etmq->offset = 0; + etmq->eot = false; + + return etmq; + +out_free: + zfree(&etmq->event_buf); + zfree(&etmq->chain); + free(etmq); + return NULL; +} + +static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, + struct auxtrace_queue *queue, + unsigned int queue_nr) +{ + struct cs_etm_queue *etmq = queue->priv; + + if (list_empty(&(queue->head))) + return 0; + + if (etmq == NULL) { + etmq = cs_etm__alloc_queue(etm,queue_nr); + + if (etmq == NULL) { + return -ENOMEM; + } + + queue->priv = etmq; + + if (queue->cpu != -1) { + etmq->cpu = queue->cpu; + } + + etmq->tid = queue->tid; + + if (etm->sampling_mode) { + if (etm->timeless_decoding) + etmq->step_through_buffers = true; + if (etm->timeless_decoding || !etm->have_sched_switch) + etmq->use_buffer_pid_tid = true; + } + } + + if (!etmq->on_heap && + (!etm->sync_switch)) { + const struct cs_etm_state *state; + int ret = 0; + + if (etm->timeless_decoding) + return ret; + + //cs_etm__log("queue %u getting timestamp\n",queue_nr); + //cs_etm__log("queue %u decoding cpu %d pid %d tid %d\n", + //queue_nr, etmq->cpu, etmq->pid, etmq->tid); + (void) state; + return ret; + /* + while (1) { + state = cs_etm_decoder__decode(etmq->decoder); + if (state->err) { + if (state->err == CS_ETM_ERR_NODATA) { + //cs_etm__log("queue %u has no timestamp\n", + //queue_nr); + return 0; + } + continue; + } + if (state->timestamp) + break; + } + + etmq->timestamp = state->timestamp; + //cs_etm__log("queue %u timestamp 0x%"PRIx64 "\n", + //queue_nr, etmq->timestamp); + etmq->state = state; + etmq->have_sample = true; + //cs_etm__sample_flags(etmq); + ret = auxtrace_heap__add(&etm->heap, queue_nr, etmq->timestamp); + if (ret) + return ret; + etmq->on_heap = true; + */ + } + + return 0; +} + + +static int cs_etm__setup_queues(struct cs_etm_auxtrace *etm) +{ + unsigned int i; + int ret; + + for (i = 0; i < etm->queues.nr_queues; i++) { + ret = cs_etm__setup_queue(etm, &(etm->queues.queue_array[i]),i); + if (ret) + return ret; + } + return 0; +} + +#if 0 +struct cs_etm_cache_entry { + struct auxtrace_cache_entry entry; + uint64_t icount; + uint64_t bcount; +}; + +static size_t cs_etm__cache_divisor(void) +{ + static size_t d = 64; + + return d; +} + +static size_t cs_etm__cache_size(struct dso *dso, + struct machine *machine) +{ + off_t size; + + size = dso__data_size(dso,machine); + size /= cs_etm__cache_divisor(); + + if (size < 1000) + return 10; + + if (size > (1 << 21)) + return 21; + + return 32 - __builtin_clz(size); +} + +static struct auxtrace_cache *cs_etm__cache(struct dso *dso, + struct machine *machine) +{ + struct auxtrace_cache *c; + size_t bits; + + if (dso->auxtrace_cache) + return dso->auxtrace_cache; + + bits = cs_etm__cache_size(dso,machine); + + c = auxtrace_cache__new(bits, sizeof(struct cs_etm_cache_entry), 200); + + dso->auxtrace_cache = c; + + return c; +} + +static int cs_etm__cache_add(struct dso *dso, struct machine *machine, + uint64_t offset, uint64_t icount, uint64_t bcount) +{ + struct auxtrace_cache *c = cs_etm__cache(dso, machine); + struct cs_etm_cache_entry *e; + int err; + + if (!c) + return -ENOMEM; + + e = auxtrace_cache__alloc_entry(c); + if (!e) + return -ENOMEM; + + e->icount = icount; + e->bcount = bcount; + + err = auxtrace_cache__add(c, offset, &e->entry); + + if (err) + auxtrace_cache__free_entry(c, e); + + return err; +} + +static struct cs_etm_cache_entry *cs_etm__cache_lookup(struct dso *dso, + struct machine *machine, + uint64_t offset) +{ + struct auxtrace_cache *c = cs_etm__cache(dso, machine); + + if (!c) + return NULL; + + return auxtrace_cache__lookup(dso->auxtrace_cache, offset); +} +#endif + +static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, + struct cs_etm_packet *packet) +{ + int ret = 0; + struct cs_etm_auxtrace *etm = etmq->etm; + union perf_event *event = etmq->event_buf; + struct perf_sample sample = {.ip = 0,}; + uint64_t start_addr = packet->start_addr; + uint64_t end_addr = packet->end_addr; + + event->sample.header.type = PERF_RECORD_SAMPLE; + event->sample.header.misc = PERF_RECORD_MISC_USER; + event->sample.header.size = sizeof(struct perf_event_header); + + + sample.ip = start_addr; + sample.pid = etmq->pid; + sample.tid = etmq->tid; + sample.addr = end_addr; + sample.id = etmq->etm->instructions_id; + sample.stream_id = etmq->etm->instructions_id; + sample.period = (end_addr - start_addr) >> 2; + sample.cpu = etmq->cpu; + sample.flags = 0; // etmq->flags; + sample.insn_len = 1; // etmq->insn_len; + + //etmq->last_insn_cnt = etmq->state->tot_insn_cnt; + +#if 0 + { + struct addr_location al; + uint64_t offset; + struct thread *thread; + struct machine *machine = etmq->etm->machine; + uint8_t cpumode; + struct cs_etm_cache_entry *e; + uint8_t buf[256]; + size_t bufsz; + + thread = etmq->thread; + + if (!thread) { + thread = etmq->etm->unknown_thread; + } + + if (start_addr > 0xffffffc000000000UL) { + cpumode = PERF_RECORD_MISC_KERNEL; + } else { + cpumode = PERF_RECORD_MISC_USER; + } + + thread__find_addr_map(thread, cpumode, MAP__FUNCTION, start_addr,&al); + if (!al.map || !al.map->dso) { + goto endTest; + } + if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR && + dso__data_status_seen(al.map->dso,DSO_DATA_STATUS_SEEN_ITRACE)) { + goto endTest; + } + + offset = al.map->map_ip(al.map,start_addr); + + + e = cs_etm__cache_lookup(al.map->dso, machine, offset); + + if (e) { + (void) e; + } else { + int len; + map__load(al.map, machine->symbol_filter); + + bufsz = sizeof(buf); + len = dso__data_read_offset(al.map->dso, machine, + offset, buf, bufsz); + + if (len <= 0) { + goto endTest; + } + + cs_etm__cache_add(al.map->dso, machine, offset, (end_addr - start_addr) >> 2, end_addr - start_addr); + + } +endTest: + (void) offset; + } +#endif + + ret = perf_session__deliver_synth_event(etm->session,event, &sample); + + if (ret) { + pr_err("CS ETM Trace: failed to deliver instruction event, error %d\n", ret); + + } + return ret; +} + +struct cs_etm_synth { + struct perf_tool dummy_tool; + struct perf_session *session; +}; + + +static int cs_etm__event_synth(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct cs_etm_synth *cs_etm_synth = + container_of(tool, struct cs_etm_synth, dummy_tool); + + (void) sample; + (void) machine; + + return perf_session__deliver_synth_event(cs_etm_synth->session, event, NULL); + +} + + +static int cs_etm__synth_event(struct perf_session *session, + struct perf_event_attr *attr, u64 id) +{ + struct cs_etm_synth cs_etm_synth; + + memset(&cs_etm_synth, 0, sizeof(struct cs_etm_synth)); + cs_etm_synth.session = session; + + return perf_event__synthesize_attr(&cs_etm_synth.dummy_tool, attr, 1, + &id, cs_etm__event_synth); +} + +static int cs_etm__synth_events(struct cs_etm_auxtrace *etm, + struct perf_session *session) +{ + struct perf_evlist *evlist = session->evlist; + struct perf_evsel *evsel; + struct perf_event_attr attr; + bool found = false; + u64 id; + int err; + + evlist__for_each(evlist, evsel) { + + if (evsel->attr.type == etm->pmu_type) { + found = true; + break; + } + } + + if (!found) { + pr_debug("There are no selected events with Core Sight Trace data\n"); + return 0; + } + + memset(&attr, 0, sizeof(struct perf_event_attr)); + attr.size = sizeof(struct perf_event_attr); + attr.type = PERF_TYPE_HARDWARE; + attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK; + attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | + PERF_SAMPLE_PERIOD; + if (etm->timeless_decoding) + attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; + else + attr.sample_type |= PERF_SAMPLE_TIME; + + attr.exclude_user = evsel->attr.exclude_user; + attr.exclude_kernel = evsel->attr.exclude_kernel; + attr.exclude_hv = evsel->attr.exclude_hv; + attr.exclude_host = evsel->attr.exclude_host; + attr.exclude_guest = evsel->attr.exclude_guest; + attr.sample_id_all = evsel->attr.sample_id_all; + attr.read_format = evsel->attr.read_format; + + id = evsel->id[0] + 1000000000; + + if (!id) + id = 1; + + if (etm->synth_opts.instructions) { + attr.config = PERF_COUNT_HW_INSTRUCTIONS; + attr.sample_period = etm->synth_opts.period; + etm->instructions_sample_period = attr.sample_period; + err = cs_etm__synth_event(session, &attr, id); + + if (err) { + pr_err("%s: failed to synthesize 'instructions' event type\n", + __func__); + return err; + } + etm->sample_instructions = true; + etm->instructions_sample_type = attr.sample_type; + etm->instructions_id = id; + id += 1; + } + + etm->synth_needs_swap = evsel->needs_swap; + return 0; +} + +static int cs_etm__sample(struct cs_etm_queue *etmq) +{ + //const struct cs_etm_state *state = etmq->state; + struct cs_etm_packet packet; + //struct cs_etm_auxtrace *etm = etmq->etm; + int err; + + if (!etmq->have_sample) + return 0; + + etmq->have_sample = false; + + err = cs_etm_decoder__get_packet(etmq->decoder,&packet); + // if there is no sample, it returns err = -1, no real error + + if (!err && packet.sample_type & CS_ETM_RANGE) { + err = cs_etm__synth_instruction_sample(etmq,&packet); + if (err) + return err; + } + return 0; +} + +static int cs_etm__run_decoder(struct cs_etm_queue *etmq, u64 *timestamp) +{ + struct cs_etm_buffer buffer = {.buf = 0,}; + size_t buffer_used = 0; + int err = 0; + + err = cs_etm__get_trace(&buffer,etmq); + if (err) + return err; + + do { + size_t processed = 0; + etmq->state = cs_etm_decoder__process_data_block(etmq->decoder, + etmq->offset, + &buffer.buf[buffer_used], + buffer.len-buffer_used, + &processed); + err = etmq->state->err; + etmq->offset += processed; + buffer_used += processed; + if (!err) { + etmq->have_sample = true; + cs_etm__sample(etmq); + } + } while (!etmq->eot && (buffer.len > buffer_used)); + + (void) timestamp; + + return err; +} + +static int cs_etm__update_queues(struct cs_etm_auxtrace *etm) +{ + if (etm->queues.new_data) { + etm->queues.new_data = false; + return cs_etm__setup_queues(etm); + } + return 0; +} + +static int cs_etm__process_queues(struct cs_etm_auxtrace *etm, u64 timestamp) +{ + unsigned int queue_nr; + u64 ts; + int ret; + + while (1) { + struct auxtrace_queue *queue; + struct cs_etm_queue *etmq; + + if (!etm->heap.heap_cnt) + return 0; + + if (etm->heap.heap_array[0].ordinal >= timestamp) + return 0; + + queue_nr = etm->heap.heap_array[0].queue_nr; + queue = &etm->queues.queue_array[queue_nr]; + etmq = queue->priv; + + //cs_etm__log("queue %u processing 0x%" PRIx64 " to 0x%" PRIx64 "\n", + //queue_nr, etm->heap.heap_array[0].ordinal, + //timestamp); + + auxtrace_heap__pop(&etm->heap); + + if (etm->heap.heap_cnt) { + ts = etm->heap.heap_array[0].ordinal + 1; + if (ts > timestamp) + ts = timestamp; + } else { + ts = timestamp; + } + + cs_etm__set_pid_tid_cpu(etm, queue); + + ret = cs_etm__run_decoder(etmq, &ts); + + if (ret < 0) { + auxtrace_heap__add(&etm->heap, queue_nr, ts); + return ret; + } + + if (!ret) { + ret = auxtrace_heap__add(&etm->heap, queue_nr, ts); + if (ret < 0) + return ret; + } else { + etmq->on_heap = false; + } + } + return 0; +} + +static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, + pid_t tid, + u64 time_) +{ + struct auxtrace_queues *queues = &etm->queues; + unsigned int i; + u64 ts = 0; + + for (i = 0; i < queues->nr_queues; ++i) { + struct auxtrace_queue *queue = &(etm->queues.queue_array[i]); + struct cs_etm_queue *etmq = queue->priv; + + if (etmq && ((tid == -1) || (etmq->tid == tid))) { + etmq->time = time_; + cs_etm__set_pid_tid_cpu(etm, queue); + cs_etm__run_decoder(etmq,&ts); + + } + } + return 0; +} + +static struct cs_etm_queue *cs_etm__cpu_to_etmq(struct cs_etm_auxtrace *etm, + int cpu) +{ + unsigned q,j; + + if (/*(cpu < 0) ||*/ (0 == etm->queues.nr_queues)) + return NULL; + + if ((unsigned) cpu >= etm->queues.nr_queues) + q = etm->queues.nr_queues - 1; + else + q = cpu; + + if (etm->queues.queue_array[q].cpu == cpu) + return etm->queues.queue_array[q].priv; + + for (j = 0; q > 0; j++) { + if (etm->queues.queue_array[--q].cpu == cpu) + return etm->queues.queue_array[q].priv; + } + + for (; j < etm->queues.nr_queues; j++) { + if (etm->queues.queue_array[j].cpu == cpu) + return etm->queues.queue_array[j].priv; + + } + + return NULL; +} + +static uint32_t cs_etm__mem_access(struct cs_etm_queue *etmq, uint64_t address, size_t size, uint8_t *buffer) +{ + struct addr_location al; + uint64_t offset; + struct thread *thread; + struct machine *machine; + uint8_t cpumode; + int len; + + if (etmq == NULL) + return -1; + + machine = etmq->etm->machine; + thread = etmq->thread; + if (address > 0xffffffc000000000UL) { + cpumode = PERF_RECORD_MISC_KERNEL; + } else { + cpumode = PERF_RECORD_MISC_USER; + } + + thread__find_addr_map(thread, cpumode, MAP__FUNCTION, address,&al); + + if (!al.map || !al.map->dso) { + return 0; + } + + if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR && + dso__data_status_seen(al.map->dso,DSO_DATA_STATUS_SEEN_ITRACE)) { + return 0; + } + + offset = al.map->map_ip(al.map,address); + + map__load(al.map, machine->symbol_filter); + + len = dso__data_read_offset(al.map->dso, machine, + offset, buffer, size); + + if (len <= 0) { + return 0; + } + + return len; +} + +static bool check_need_swap(int file_endian) +{ + const int data = 1; + u8 *check = (u8 *)&data; + int host_endian; + + if (check[0] == 1) + host_endian = ELFDATA2LSB; + else + host_endian = ELFDATA2MSB; + + return host_endian != file_endian; +} + +static int cs_etm__read_elf_info(const char *fname, uint64_t *foffset, uint64_t *fstart, uint64_t *fsize) +{ + FILE *fp; + u8 e_ident[EI_NIDENT]; + int ret = -1; + bool need_swap = false; + size_t buf_size; + void *buf; + int i; + + fp = fopen(fname, "r"); + if (fp == NULL) + return -1; + + if (fread(e_ident, sizeof(e_ident), 1, fp) != 1) + goto out; + + if (memcmp(e_ident, ELFMAG, SELFMAG) || + e_ident[EI_VERSION] != EV_CURRENT) + goto out; + + need_swap = check_need_swap(e_ident[EI_DATA]); + + /* for simplicity */ + fseek(fp, 0, SEEK_SET); + + if (e_ident[EI_CLASS] == ELFCLASS32) { + Elf32_Ehdr ehdr; + Elf32_Phdr *phdr; + + if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1) + goto out; + + if (need_swap) { + ehdr.e_phoff = bswap_32(ehdr.e_phoff); + ehdr.e_phentsize = bswap_16(ehdr.e_phentsize); + ehdr.e_phnum = bswap_16(ehdr.e_phnum); + } + + buf_size = ehdr.e_phentsize * ehdr.e_phnum; + buf = malloc(buf_size); + if (buf == NULL) + goto out; + + fseek(fp, ehdr.e_phoff, SEEK_SET); + if (fread(buf, buf_size, 1, fp) != 1) + goto out_free; + + for (i = 0, phdr = buf; i < ehdr.e_phnum; i++, phdr++) { + + if (need_swap) { + phdr->p_type = bswap_32(phdr->p_type); + phdr->p_offset = bswap_32(phdr->p_offset); + phdr->p_filesz = bswap_32(phdr->p_filesz); + } + + if (phdr->p_type != PT_LOAD) + continue; + + *foffset = phdr->p_offset; + *fstart = phdr->p_vaddr; + *fsize = phdr->p_filesz; + ret = 0; + break; + } + } else { + Elf64_Ehdr ehdr; + Elf64_Phdr *phdr; + + if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1) + goto out; + + if (need_swap) { + ehdr.e_phoff = bswap_64(ehdr.e_phoff); + ehdr.e_phentsize = bswap_16(ehdr.e_phentsize); + ehdr.e_phnum = bswap_16(ehdr.e_phnum); + } + + buf_size = ehdr.e_phentsize * ehdr.e_phnum; + buf = malloc(buf_size); + if (buf == NULL) + goto out; + + fseek(fp, ehdr.e_phoff, SEEK_SET); + if (fread(buf, buf_size, 1, fp) != 1) + goto out_free; + + for (i = 0, phdr = buf; i < ehdr.e_phnum; i++, phdr++) { + + if (need_swap) { + phdr->p_type = bswap_32(phdr->p_type); + phdr->p_offset = bswap_64(phdr->p_offset); + phdr->p_filesz = bswap_64(phdr->p_filesz); + } + + if (phdr->p_type != PT_LOAD) + continue; + + *foffset = phdr->p_offset; + *fstart = phdr->p_vaddr; + *fsize = phdr->p_filesz; + ret = 0; + break; + } + } +out_free: + free(buf); +out: + fclose(fp); + return ret; +} + +static int cs_etm__process_event(struct perf_session *session, + union perf_event *event, + struct perf_sample *sample, + struct perf_tool *tool) +{ + struct cs_etm_auxtrace *etm = container_of(session->auxtrace, + struct cs_etm_auxtrace, + auxtrace); + + u64 timestamp; + int err = 0; + + if (dump_trace) + return 0; + + if (!tool->ordered_events) { + pr_err("CoreSight ETM Trace requires ordered events\n"); + return -EINVAL; + } + + if (sample->time && (sample->time != (u64)-1)) + timestamp = sample->time; + else + timestamp = 0; + + if (timestamp || etm->timeless_decoding) { + err = cs_etm__update_queues(etm); + if (err) + return err; + + } + + if (event->header.type == PERF_RECORD_MMAP2) { + struct dso *dso; + int cpu; + struct cs_etm_queue *etmq; + + cpu = sample->cpu; + + etmq = cs_etm__cpu_to_etmq(etm,cpu); + + if (!etmq) { + return -1; + } + + dso = dsos__find(&(etm->machine->dsos),event->mmap2.filename,false); + if (NULL != dso) { + err = cs_etm_decoder__add_mem_access_cb( + etmq->decoder, + event->mmap2.start, + event->mmap2.len, + cs_etm__mem_access); + } + + if ((symbol_conf.vmlinux_name != NULL) && (!etmq->kernel_mapped)) { + uint64_t foffset; + uint64_t fstart; + uint64_t fsize; + + err = cs_etm__read_elf_info(symbol_conf.vmlinux_name, + &foffset,&fstart,&fsize); + + if (!err) { + cs_etm_decoder__add_bin_file( + etmq->decoder, + foffset, + fstart, + fsize & ~0x1ULL, + symbol_conf.vmlinux_name); + + etmq->kernel_mapped = true; + } + } + + } + + if (etm->timeless_decoding) { + if (event->header.type == PERF_RECORD_EXIT) { + err = cs_etm__process_timeless_queues(etm, + event->fork.tid, + sample->time); + } + } else if (timestamp) { + err = cs_etm__process_queues(etm, timestamp); + } + + //cs_etm__log("event %s (%u): cpu %d time%"PRIu64" tsc %#"PRIx64"\n", + //perf_event__name(event->header.type), event->header.type, + //sample->cpu, sample->time, timestamp); + return err; +} + +static int cs_etm__process_auxtrace_event(struct perf_session *session, + union perf_event *event, + struct perf_tool *tool) +{ + struct cs_etm_auxtrace *etm = container_of(session->auxtrace, + struct cs_etm_auxtrace, + auxtrace); + + (void) tool; + + if (!etm->data_queued) { + struct auxtrace_buffer *buffer; + off_t data_offset; + int fd = perf_data_file__fd(session->file); + bool is_pipe = perf_data_file__is_pipe(session->file); + int err; + + if (is_pipe) { + data_offset = 0; + } else { + data_offset = lseek(fd, 0, SEEK_CUR); + if (data_offset == -1) { + return -errno; + } + } + + err = auxtrace_queues__add_event(&etm->queues, + session, + event, + data_offset, + &buffer); + if (err) + return err; + + if (dump_trace) + { + if (auxtrace_buffer__get_data(buffer,fd)) { + cs_etm__dump_event(etm,buffer); + auxtrace_buffer__put_data(buffer); + } + } + } + + return 0; + +} + +static const char * const cs_etm_global_header_fmts[] = { + [CS_HEADER_VERSION_0] = " Header version %"PRIx64"\n", + [CS_PMU_TYPE_CPUS] = " PMU type/num cpus %"PRIx64"\n", + [CS_ETM_SNAPSHOT] = " Snapshot %"PRIx64"\n", +}; + +static const char * const cs_etm_priv_fmts[] = { + [CS_ETM_MAGIC] = " Magic number %"PRIx64"\n", + [CS_ETM_CPU] = " CPU %"PRIx64"\n", + [CS_ETM_ETMCR] = " ETMCR %"PRIx64"\n", + [CS_ETM_ETMTRACEIDR] = " ETMTRACEIDR %"PRIx64"\n", + [CS_ETM_ETMCCER] = " ETMCCER %"PRIx64"\n", + [CS_ETM_ETMIDR] = " ETMIDR %"PRIx64"\n", +}; + +static const char * const cs_etmv4_priv_fmts[] = { + [CS_ETM_MAGIC] = " Magic number %"PRIx64"\n", + [CS_ETM_CPU] = " CPU %"PRIx64"\n", + [CS_ETMV4_TRCCONFIGR] = " TRCCONFIGR %"PRIx64"\n", + [CS_ETMV4_TRCTRACEIDR] = " TRCTRACEIDR %"PRIx64"\n", + [CS_ETMV4_TRCIDR0] = " TRCIDR0 %"PRIx64"\n", + [CS_ETMV4_TRCIDR1] = " TRCIDR1 %"PRIx64"\n", + [CS_ETMV4_TRCIDR2] = " TRCIDR2 %"PRIx64"\n", + [CS_ETMV4_TRCIDR8] = " TRCIDR8 %"PRIx64"\n", + [CS_ETMV4_TRCAUTHSTATUS] = " TRCAUTHSTATUS %"PRIx64"\n", +}; + +static void cs_etm__print_auxtrace_info(u64 *val, size_t num) +{ + unsigned i,j,cpu; + + for (i = 0, cpu = 0; cpu < num; ++cpu) { + + if (val[i] == __perf_cs_etmv3_magic) { + for (j = 0; j < CS_ETM_PRIV_MAX; ++j, ++i) { + fprintf(stdout,cs_etm_priv_fmts[j],val[i]); + } + } else if (val[i] == __perf_cs_etmv4_magic) { + for (j = 0; j < CS_ETMV4_PRIV_MAX; ++j, ++i) { + fprintf(stdout,cs_etmv4_priv_fmts[j],val[i]); + } + } else { + // failure.. return + return; + } + } +} + +int cs_etm__process_auxtrace_info(union perf_event *event, + struct perf_session *session) +{ + struct auxtrace_info_event *auxtrace_info = &(event->auxtrace_info); + size_t event_header_size = sizeof(struct perf_event_header); + size_t info_header_size = 8; + size_t total_size = auxtrace_info->header.size; + size_t priv_size = 0; + size_t num_cpu; + struct cs_etm_auxtrace *etm = 0; + int err = 0; + u64 *ptr; + u64 *hdr = NULL; + u64 **metadata = NULL; + size_t i,j,k; + unsigned pmu_type; + + if (total_size < (event_header_size + info_header_size)) + return -EINVAL; + + priv_size = total_size - event_header_size - info_header_size; + + // First the global part + + ptr = (u64 *) auxtrace_info->priv; + if (ptr[0] == 0) { + hdr = zalloc(sizeof(u64 *) * CS_HEADER_VERSION_0_MAX); + if (hdr == NULL) { + return -EINVAL; + } + for (i = 0; i < CS_HEADER_VERSION_0_MAX; ++i) { + hdr[i] = ptr[i]; + } + num_cpu = hdr[CS_PMU_TYPE_CPUS] & 0xffffffff; + pmu_type = (unsigned) ((hdr[CS_PMU_TYPE_CPUS] >> 32) & 0xffffffff); + } else { + return -EINVAL; + } + + metadata = zalloc(sizeof(u64 *) * num_cpu); + + if (metadata == NULL) { + return -EINVAL; + } + + for (j = 0; j < num_cpu; ++j) { + if (ptr[i] == __perf_cs_etmv3_magic) { + metadata[j] = zalloc(sizeof(u64)*CS_ETM_PRIV_MAX); + if (metadata == NULL) + return -EINVAL; + for (k = 0; k < CS_ETM_PRIV_MAX; k++) { + metadata[j][k] = ptr[i+k]; + } + i += CS_ETM_PRIV_MAX; + } else if (ptr[i] == __perf_cs_etmv4_magic) { + metadata[j] = zalloc(sizeof(u64)*CS_ETMV4_PRIV_MAX); + if (metadata == NULL) + return -EINVAL; + for (k = 0; k < CS_ETMV4_PRIV_MAX; k++) { + metadata[j][k] = ptr[i+k]; + } + i += CS_ETMV4_PRIV_MAX; + } + } + + if (i*8 != priv_size) + return -EINVAL; + + if (dump_trace) + cs_etm__print_auxtrace_info(auxtrace_info->priv,num_cpu); + + etm = zalloc(sizeof(struct cs_etm_auxtrace)); + + etm->num_cpu = num_cpu; + etm->pmu_type = pmu_type; + etm->snapshot_mode = (hdr[CS_ETM_SNAPSHOT] != 0); + + if (!etm) + return -ENOMEM; + + + err = auxtrace_queues__init(&etm->queues); + if (err) + goto err_free; + + etm->unknown_thread = thread__new(999999999,999999999); + if (etm->unknown_thread == NULL) { + err = -ENOMEM; + goto err_free_queues; + } + err = thread__set_comm(etm->unknown_thread, "unknown", 0); + if (err) { + goto err_delete_thread; + } + + if (thread__init_map_groups(etm->unknown_thread, + etm->machine)) { + err = -ENOMEM; + goto err_delete_thread; + } + + etm->timeless_decoding = true; + etm->sampling_mode = false; + etm->metadata = metadata; + etm->session = session; + etm->machine = &session->machines.host; + etm->auxtrace_type = auxtrace_info->type; + + etm->auxtrace.process_event = cs_etm__process_event; + etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event; + etm->auxtrace.flush_events = cs_etm__flush_events; + etm->auxtrace.free_events = cs_etm__free_events; + etm->auxtrace.free = cs_etm__free; + session->auxtrace = &(etm->auxtrace); + + if (dump_trace) + return 0; + + if (session->itrace_synth_opts && session->itrace_synth_opts->set) { + etm->synth_opts = *session->itrace_synth_opts; + } else { + itrace_synth_opts__set_default(&etm->synth_opts); + } + etm->synth_opts.branches = false; + etm->synth_opts.callchain = false; + etm->synth_opts.calls = false; + etm->synth_opts.returns = false; + + err = cs_etm__synth_events(etm, session); + if (err) + goto err_delete_thread; + + err = auxtrace_queues__process_index(&etm->queues, session); + if (err) + goto err_delete_thread; + + etm->data_queued = etm->queues.populated; + + return 0; + +err_delete_thread: + thread__delete(etm->unknown_thread); +err_free_queues: + auxtrace_queues__free(&etm->queues); + session->auxtrace = NULL; +err_free: + free(etm); + return err; +} diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index 3cc6bc3263fe..7b683a024690 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h @@ -71,4 +71,7 @@ static const u64 __perf_cs_etmv4_magic = 0x4040404040404040ULL; #define CS_ETMV3_PRIV_SIZE (CS_ETM_PRIV_MAX * sizeof(u64)) #define CS_ETMV4_PRIV_SIZE (CS_ETMV4_PRIV_MAX * sizeof(u64)) +int cs_etm__process_auxtrace_info(union perf_event *event, + struct perf_session *session); + #endif diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 8b303ff20289..888640ffada5 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1,3 +1,4 @@ +#include "build-id.h" #include "callchain.h" #include "debug.h" #include "event.h" @@ -685,8 +686,16 @@ static struct dso *machine__get_kernel(struct machine *machine) DSO_TYPE_GUEST_KERNEL); } - if (kernel != NULL && (!kernel->has_build_id)) - dso__read_running_kernel_build_id(kernel, machine); + if (kernel != NULL && (!kernel->has_build_id)) { + if (symbol_conf.vmlinux_name != NULL) { + filename__read_build_id(symbol_conf.vmlinux_name, + kernel->build_id, + sizeof(kernel->build_id)); + kernel->has_build_id = 1; + } else { + dso__read_running_kernel_build_id(kernel, machine); + } + } return kernel; } @@ -700,8 +709,19 @@ static void machine__get_kallsyms_filename(struct machine *machine, char *buf, { if (machine__is_default_guest(machine)) scnprintf(buf, bufsz, "%s", symbol_conf.default_guest_kallsyms); - else - scnprintf(buf, bufsz, "%s/proc/kallsyms", machine->root_dir); + else { + if (symbol_conf.vmlinux_name != 0) { + unsigned char build_id[BUILD_ID_SIZE]; + char build_id_hex[SBUILD_ID_SIZE]; + filename__read_build_id(symbol_conf.vmlinux_name, + build_id, + sizeof(build_id)); + build_id__sprintf(build_id,sizeof(build_id), build_id_hex); + build_id__filename((char *)build_id_hex,buf,bufsz); + } else { + scnprintf(buf, bufsz, "%s/proc/kallsyms", machine->root_dir); + } + } } const char *ref_reloc_sym_names[] = {"_text", "_stext", NULL}; @@ -710,7 +730,7 @@ const char *ref_reloc_sym_names[] = {"_text", "_stext", NULL}; * Returns the name of the start symbol in *symbol_name. Pass in NULL as * symbol_name if it's not that important. */ -static u64 machine__get_running_kernel_start(struct machine *machine, +static u64 machine__get_kallsyms_kernel_start(struct machine *machine, const char **symbol_name) { char filename[PATH_MAX]; @@ -738,7 +758,7 @@ static u64 machine__get_running_kernel_start(struct machine *machine, int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel) { enum map_type type; - u64 start = machine__get_running_kernel_start(machine, NULL); + u64 start = machine__get_kallsyms_kernel_start(machine, NULL); for (type = 0; type < MAP__NR_TYPES; ++type) { struct kmap *kmap; @@ -1083,7 +1103,8 @@ int machine__create_kernel_maps(struct machine *machine) { struct dso *kernel = machine__get_kernel(machine); const char *name; - u64 addr = machine__get_running_kernel_start(machine, &name); + u64 addr = machine__get_kallsyms_kernel_start(machine, &name); + if (!addr) return -1; diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index a8e825fca42a..df49c0035170 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -806,6 +806,8 @@ static void python_process_general_event(struct perf_sample *sample, PyInt_FromLong(sample->cpu)); pydict_set_item_string_decref(dict_sample, "ip", PyLong_FromUnsignedLongLong(sample->ip)); + pydict_set_item_string_decref(dict_sample, "addr", + PyLong_FromUnsignedLongLong(sample->addr)); pydict_set_item_string_decref(dict_sample, "time", PyLong_FromUnsignedLongLong(sample->time)); pydict_set_item_string_decref(dict_sample, "period", From 69f28417978e8d4a35c36275340170aa31458e09 Mon Sep 17 00:00:00 2001 From: tor-jeremiassen Date: Wed, 17 Feb 2016 10:58:21 -0600 Subject: [PATCH 17/23] perf scripts: Add python scripts for CoreSight traces Example scripts for CoreSight trace processing with perf script. Signed-off-by: Tor Jeremiassen --- tools/perf/scripts/python/cs-trace-disasm.py | 122 +++++++++++++++++++ tools/perf/scripts/python/cs-trace-ranges.py | 44 +++++++ 2 files changed, 166 insertions(+) create mode 100644 tools/perf/scripts/python/cs-trace-disasm.py create mode 100644 tools/perf/scripts/python/cs-trace-ranges.py diff --git a/tools/perf/scripts/python/cs-trace-disasm.py b/tools/perf/scripts/python/cs-trace-disasm.py new file mode 100644 index 000000000000..175ae4bfa732 --- /dev/null +++ b/tools/perf/scripts/python/cs-trace-disasm.py @@ -0,0 +1,122 @@ +# perf script event handlers, generated by perf script -g python +# Licensed under the terms of the GNU GPL License version 2 + +# The common_* event handler fields are the most useful fields common to +# all events. They don't necessarily correspond to the 'common_*' fields +# in the format files. Those fields not available as handler params can +# be retrieved using Python functions of the form common_*(context). +# See the perf-trace-python Documentation for the list of available functions. + +import os +import sys + +sys.path.append(os.environ['PERF_EXEC_PATH'] + \ + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') + +from perf_trace_context import * +from subprocess import * +from Core import * +import re; + +from optparse import OptionParser + +# +# Add options to specify vmlinux file and the objdump executable +# +parser = OptionParser() +parser.add_option("-k", "--vmlinux", dest="vmlinux_name", + help="path to vmlinux file") +parser.add_option("-d", "--objdump", dest="objdump_name", + help="name of objdump executable (in path)") +(options, args) = parser.parse_args() + +if (options.objdump_name == None): + sys.exit("No objdump executable specified - use -d or --objdump option") + +# initialize global dicts and regular expression + +build_ids = dict(); +mmaps = dict(); +disasm_cache = dict(); +disasm_re = re.compile("^\s*([0-9a-fA-F]+):") + +cache_size = 16*1024 + +def trace_begin(): + cmd_output = check_output(["perf", "buildid-list"]).split('\n'); + bid_re = re.compile("([a-fA-f0-9]+)[ \t]([^ \n]+)") + for line in cmd_output: + m = bid_re.search(line) + if (m != None) : + build_ids[m.group(2)] = \ + os.environ['PERF_BUILDID_DIR'] + \ + m.group(2) + "/" + m.group(1); + + if ((options.vmlinux_name != None) and ("[kernel.kallsyms]" in build_ids)): + build_ids['[kernel.kallsyms]'] = options.vmlinux_name; + else: + del build_ids['[kernel.kallsyms]'] + + mmap_re = re.compile("PERF_RECORD_MMAP2 -?[0-9]+/[0-9]+: \[(0x[0-9a-fA-F]+).*:\s.*\s(.*.so)") + cmd_output= check_output("perf script --show-mmap-events | fgrep PERF_RECORD_MMAP2",shell=True).split('\n') + for line in cmd_output: + m = mmap_re.search(line) + if (m != None) : + mmaps[m.group(2)] = int(m.group(1),0) + + + +def trace_end(): + pass + +def process_event(t): + global cache_size + global options + + sample = t['sample'] + dso = t['dso'] + + # don't let the cache get too big, but don't bother with a fancy replacement policy + # just clear it when it hits max size + + if (len(disasm_cache) > cache_size): + disasm_cache.clear(); + + addr_range = format(sample['ip'],"x") + ":" + format(sample['addr'],"x"); + + try: + disasm_output = disasm_cache[addr_range]; + except: + try: + fname = build_ids[dso]; + except KeyError: + if (dso == '[kernel.kallsyms]'): + return; + fname = dso; + + if (dso in mmaps): + offset = mmaps[dso]; + disasm = [options.objdump_name,"-d","-z", "--adjust-vma="+format(offset,"#x"),"--start-address="+format(sample['ip'],"#x"),"--stop-address="+format(sample['addr'],"#x"), fname] + else: + offset = 0 + disasm = [options.objdump_name,"-d","-z", "--start-address="+format(sample['ip'],"#x"),"--stop-address="+format(sample['addr'],"#x"),fname] + disasm_output = check_output(disasm).split('\n') + disasm_cache[addr_range] = disasm_output; + + for line in disasm_output: + m = disasm_re.search(line) + if (m != None) : + try: + print "\t",line + except: + exit(1); + else: + continue; + +def trace_unhandled(event_name, context, event_fields_dict): + print ' '.join(['%s=%s'%(k,str(v))for k,v in sorted(event_fields_dict.items())]) + +def print_header(event_name, cpu, secs, nsecs, pid, comm): + print "print_header" + print "%-20s %5u %05u.%09u %8u %-20s " % \ + (event_name, cpu, secs, nsecs, pid, comm), diff --git a/tools/perf/scripts/python/cs-trace-ranges.py b/tools/perf/scripts/python/cs-trace-ranges.py new file mode 100644 index 000000000000..c8edacba0f83 --- /dev/null +++ b/tools/perf/scripts/python/cs-trace-ranges.py @@ -0,0 +1,44 @@ +# +# Copyright(C) 2016 Linaro Limited. All rights reserved. +# Author: Tor Jeremiassen +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License version 2 as published by +# the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see . +# + +import os +import sys + +sys.path.append(os.environ['PERF_EXEC_PATH'] + \ + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') + +from perf_trace_context import * + +def trace_begin(): + pass; + +def trace_end(): + pass + +def process_event(t): + + sample = t['sample'] + + print "range:",format(sample['ip'],"x"),"-",format(sample['addr'],"x") + +def trace_unhandled(event_name, context, event_fields_dict): + print ' '.join(['%s=%s'%(k,str(v))for k,v in sorted(event_fields_dict.items())]) + +def print_header(event_name, cpu, secs, nsecs, pid, comm): + print "print_header" + print "%-20s %5u %05u.%09u %8u %-20s " % \ + (event_name, cpu, secs, nsecs, pid, comm), From cda8131df1fae7efc2d56967b2cc45ddf52c85b9 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Tue, 3 May 2016 13:26:08 -0600 Subject: [PATCH 18/23] perf tools: new naming convention for openCSD The naming convention for the openCSD API and header files was changed so that using it was easier. Headers went from "rctdl_xyz.h" to "opencsd_xyz.h" while internal symbol from "rctdl_" to "ocsd_". Signed-off-by: Mathieu Poirier --- .../perf/util/cs-etm-decoder/cs-etm-decoder.c | 138 +++++++++--------- 1 file changed, 70 insertions(+), 68 deletions(-) diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 2fef19922fdd..2cc7ccd3c22a 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -21,8 +21,8 @@ #include "cs-etm-decoder.h" #include "../util.h" -#include "c_api/rctdl_c_api.h" -#include "rctdl_if_types.h" +#include "c_api/opencsd_c_api.h" +#include "ocsd_if_types.h" #include "etmv4/trc_pkt_types_etmv4.h" #define MAX_BUFFER 1024 @@ -35,7 +35,7 @@ struct cs_etm_decoder dcd_tree_handle_t dcd_tree; void (*packet_printer)(const char *); cs_etm_mem_cb_type mem_access; - rctdl_datapath_resp_t prev_return; + ocsd_datapath_resp_t prev_return; size_t prev_processed; bool trace_on; bool discontinuity; @@ -47,8 +47,8 @@ struct cs_etm_decoder }; static uint32_t cs_etm_decoder__mem_access(const void *context, - const rctdl_vaddr_t address, - const rctdl_mem_space_acc_t mem_space, + const ocsd_vaddr_t address, + const ocsd_mem_space_acc_t mem_space, const uint32_t req_size, uint8_t *buffer) { @@ -59,7 +59,7 @@ static uint32_t cs_etm_decoder__mem_access(const void *context, } static int cs_etm_decoder__gen_etmv4_config(struct cs_etm_trace_params *params, - rctdl_etmv4_cfg *config) + ocsd_etmv4_cfg *config) { config->reg_configr = params->reg_configr; config->reg_traceidr = params->reg_traceidr; @@ -100,7 +100,9 @@ int cs_etm_decoder__flush(struct cs_etm_decoder *decoder) return cs_etm_decoder__flush_packet(decoder); } -static int cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder, const rctdl_generic_trace_elem *elem, enum cs_etm_sample_type sample_type) +static int cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder, + const ocsd_generic_trace_elem *elem, + enum cs_etm_sample_type sample_type) { int err = 0; uint32_t et = 0; @@ -149,48 +151,48 @@ static int cs_etm_decoder__mark_exception_return(struct cs_etm_decoder *decoder) return err; } -static rctdl_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( +static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( const void *context, - const rctdl_trc_index_t indx, + const ocsd_trc_index_t indx, const uint8_t trace_chan_id, - const rctdl_generic_trace_elem *elem) + const ocsd_generic_trace_elem *elem) { - rctdl_datapath_resp_t resp = RCTDL_RESP_CONT; + ocsd_datapath_resp_t resp = OCSD_RESP_CONT; struct cs_etm_decoder *decoder = (struct cs_etm_decoder *) context; (void) indx; (void) trace_chan_id; switch (elem->elem_type) { - case RCTDL_GEN_TRC_ELEM_UNKNOWN: + case OCSD_GEN_TRC_ELEM_UNKNOWN: break; - case RCTDL_GEN_TRC_ELEM_NO_SYNC: + case OCSD_GEN_TRC_ELEM_NO_SYNC: decoder->trace_on = false; break; - case RCTDL_GEN_TRC_ELEM_TRACE_ON: + case OCSD_GEN_TRC_ELEM_TRACE_ON: decoder->trace_on = true; break; - //case RCTDL_GEN_TRC_ELEM_TRACE_OVERFLOW: + //case OCSD_GEN_TRC_ELEM_TRACE_OVERFLOW: //decoder->trace_on = false; //decoder->discontinuity = true; //break; - case RCTDL_GEN_TRC_ELEM_INSTR_RANGE: + case OCSD_GEN_TRC_ELEM_INSTR_RANGE: cs_etm_decoder__buffer_packet(decoder,elem, CS_ETM_RANGE); - resp = RCTDL_RESP_WAIT; + resp = OCSD_RESP_WAIT; break; - case RCTDL_GEN_TRC_ELEM_EXCEPTION: + case OCSD_GEN_TRC_ELEM_EXCEPTION: cs_etm_decoder__mark_exception(decoder); break; - case RCTDL_GEN_TRC_ELEM_EXCEPTION_RET: + case OCSD_GEN_TRC_ELEM_EXCEPTION_RET: cs_etm_decoder__mark_exception_return(decoder); break; - case RCTDL_GEN_TRC_ELEM_PE_CONTEXT: - case RCTDL_GEN_TRC_ELEM_EO_TRACE: - case RCTDL_GEN_TRC_ELEM_ADDR_NACC: - case RCTDL_GEN_TRC_ELEM_TIMESTAMP: - case RCTDL_GEN_TRC_ELEM_CYCLE_COUNT: - //case RCTDL_GEN_TRC_ELEM_TS_WITH_CC: - case RCTDL_GEN_TRC_ELEM_EVENT: + case OCSD_GEN_TRC_ELEM_PE_CONTEXT: + case OCSD_GEN_TRC_ELEM_EO_TRACE: + case OCSD_GEN_TRC_ELEM_ADDR_NACC: + case OCSD_GEN_TRC_ELEM_TIMESTAMP: + case OCSD_GEN_TRC_ELEM_CYCLE_COUNT: + //case OCSD_GEN_TRC_ELEM_TS_WITH_CC: + case OCSD_GEN_TRC_ELEM_EVENT: default: break; } @@ -200,14 +202,14 @@ static rctdl_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( return resp; } -static rctdl_datapath_resp_t cs_etm_decoder__etmv4i_packet_printer( +static ocsd_datapath_resp_t cs_etm_decoder__etmv4i_packet_printer( const void *context, - const rctdl_datapath_op_t op, - const rctdl_trc_index_t indx, - const rctdl_etmv4_i_pkt *pkt) + const ocsd_datapath_op_t op, + const ocsd_trc_index_t indx, + const ocsd_etmv4_i_pkt *pkt) { const size_t PACKET_STR_LEN = 1024; - rctdl_datapath_resp_t ret = RCTDL_RESP_CONT; + ocsd_datapath_resp_t ret = OCSD_RESP_CONT; char packet_str[PACKET_STR_LEN]; size_t offset; struct cs_etm_decoder *decoder = (struct cs_etm_decoder *) context; @@ -216,18 +218,18 @@ static rctdl_datapath_resp_t cs_etm_decoder__etmv4i_packet_printer( offset = strlen(packet_str); switch(op) { - case RCTDL_OP_DATA: - if (rctdl_pkt_str(RCTDL_PROTOCOL_ETMV4I, + case OCSD_OP_DATA: + if (ocsd_pkt_str(OCSD_PROTOCOL_ETMV4I, (void *)pkt, packet_str+offset, - PACKET_STR_LEN-offset) != RCTDL_OK) - ret = RCTDL_RESP_FATAL_INVALID_PARAM; + PACKET_STR_LEN-offset) != OCSD_OK) + ret = OCSD_RESP_FATAL_INVALID_PARAM; break; - case RCTDL_OP_EOT: + case OCSD_OP_EOT: sprintf(packet_str,"**** END OF TRACE ****\n"); break; - case RCTDL_OP_FLUSH: - case RCTDL_OP_RESET: + case OCSD_OP_FLUSH: + case OCSD_OP_RESET: default: break; } @@ -241,7 +243,7 @@ static int cs_etm_decoder__create_etmv4i_packet_printer(struct cs_etm_decoder_pa struct cs_etm_decoder *decoder) { - rctdl_etmv4_cfg trace_config; + ocsd_etmv4_cfg trace_config; int ret = 0; if (d_params->packet_printer == NULL) @@ -254,7 +256,7 @@ static int cs_etm_decoder__create_etmv4i_packet_printer(struct cs_etm_decoder_pa decoder->packet_printer = d_params->packet_printer; - ret = rctdl_dt_create_etmv4i_pkt_proc(decoder->dcd_tree, + ret = ocsd_dt_create_etmv4i_pkt_proc(decoder->dcd_tree, &trace_config, cs_etm_decoder__etmv4i_packet_printer, decoder); @@ -265,7 +267,7 @@ static int cs_etm_decoder__create_etmv4i_packet_printer(struct cs_etm_decoder_pa static int cs_etm_decoder__create_etmv4i_packet_decoder(struct cs_etm_decoder_params *d_params, struct cs_etm_trace_params *t_params, struct cs_etm_decoder *decoder) { - rctdl_etmv4_cfg trace_config; + ocsd_etmv4_cfg trace_config; int ret = 0; decoder->packet_printer = d_params->packet_printer; @@ -274,12 +276,12 @@ static int cs_etm_decoder__create_etmv4i_packet_decoder(struct cs_etm_decoder_pa if (ret != 0) return -1; - ret = rctdl_dt_create_etmv4i_decoder(decoder->dcd_tree,&trace_config); + ret = ocsd_dt_create_etmv4i_decoder(decoder->dcd_tree,&trace_config); - if (ret != RCTDL_OK) + if (ret != OCSD_OK) return -1; - ret = rctdl_dt_set_gen_elem_outfn(decoder->dcd_tree, + ret = ocsd_dt_set_gen_elem_outfn(decoder->dcd_tree, cs_etm_decoder__gen_trace_elem_printer, decoder); return ret; } @@ -289,10 +291,10 @@ int cs_etm_decoder__add_mem_access_cb(struct cs_etm_decoder *decoder, uint64_t a int err; decoder->mem_access = cb_func; - err = rctdl_dt_add_callback_mem_acc(decoder->dcd_tree, + err = ocsd_dt_add_callback_mem_acc(decoder->dcd_tree, address, address+len-1, - RCTDL_MEM_SPACE_ANY, + OCSD_MEM_SPACE_ANY, cs_etm_decoder__mem_access, decoder); return err; @@ -314,10 +316,10 @@ int cs_etm_decoder__add_bin_file(struct cs_etm_decoder *decoder, uint64_t offset region.file_offset = offset; region.start_address = address; region.region_size = len; - err = rctdl_dt_add_binfile_region_mem_acc(decoder->dcd_tree, + err = ocsd_dt_add_binfile_region_mem_acc(decoder->dcd_tree, ®ion, 1, - RCTDL_MEM_SPACE_ANY, + OCSD_MEM_SPACE_ANY, fname); return err; @@ -330,7 +332,7 @@ const struct cs_etm_state *cs_etm_decoder__process_data_block(struct cs_etm_deco size_t *consumed) { int ret = 0; - rctdl_datapath_resp_t dp_ret = decoder->prev_return; + ocsd_datapath_resp_t dp_ret = decoder->prev_return; size_t processed = 0; if (decoder->packet_count > 0) { @@ -341,19 +343,19 @@ const struct cs_etm_state *cs_etm_decoder__process_data_block(struct cs_etm_deco while ((processed < len) && (0 == ret)) { - if (RCTDL_DATA_RESP_IS_CONT(dp_ret)) { + if (OCSD_DATA_RESP_IS_CONT(dp_ret)) { uint32_t count; - dp_ret = rctdl_dt_process_data(decoder->dcd_tree, - RCTDL_OP_DATA, + dp_ret = ocsd_dt_process_data(decoder->dcd_tree, + OCSD_OP_DATA, indx+processed, len - processed, &buf[processed], &count); processed += count; - } else if (RCTDL_DATA_RESP_IS_WAIT(dp_ret)) { - dp_ret = rctdl_dt_process_data(decoder->dcd_tree, - RCTDL_OP_FLUSH, + } else if (OCSD_DATA_RESP_IS_WAIT(dp_ret)) { + dp_ret = ocsd_dt_process_data(decoder->dcd_tree, + OCSD_OP_FLUSH, 0, 0, NULL, @@ -363,12 +365,12 @@ const struct cs_etm_state *cs_etm_decoder__process_data_block(struct cs_etm_deco ret = -1; } } - if (RCTDL_DATA_RESP_IS_WAIT(dp_ret)) { - if (RCTDL_DATA_RESP_IS_CONT(decoder->prev_return)) { + if (OCSD_DATA_RESP_IS_WAIT(dp_ret)) { + if (OCSD_DATA_RESP_IS_CONT(decoder->prev_return)) { decoder->prev_processed = processed; } processed = 0; - } else if (RCTDL_DATA_RESP_IS_WAIT(decoder->prev_return)) { + } else if (OCSD_DATA_RESP_IS_WAIT(decoder->prev_return)) { processed = decoder->prev_processed; decoder->prev_processed = 0; } @@ -413,7 +415,7 @@ static void cs_etm_decoder__clear_buffer(struct cs_etm_decoder *decoder) struct cs_etm_decoder *cs_etm_decoder__new(uint32_t num_cpu, struct cs_etm_decoder_params *d_params, struct cs_etm_trace_params t_params[]) { struct cs_etm_decoder *decoder; - rctdl_dcd_tree_src_t format; + ocsd_dcd_tree_src_t format; uint32_t flags; int ret; size_t i; @@ -429,17 +431,17 @@ struct cs_etm_decoder *cs_etm_decoder__new(uint32_t num_cpu, struct cs_etm_decod } decoder->state.data = d_params->data; - decoder->prev_return = RCTDL_RESP_CONT; + decoder->prev_return = OCSD_RESP_CONT; cs_etm_decoder__clear_buffer(decoder); - format = (d_params->formatted ? RCTDL_TRC_SRC_FRAME_FORMATTED : - RCTDL_TRC_SRC_SINGLE); + format = (d_params->formatted ? OCSD_TRC_SRC_FRAME_FORMATTED : + OCSD_TRC_SRC_SINGLE); flags = 0; - flags |= (d_params->fsyncs ? RCTDL_DFRMTR_HAS_FSYNCS : 0); - flags |= (d_params->hsyncs ? RCTDL_DFRMTR_HAS_HSYNCS : 0); - flags |= (d_params->frame_aligned ? RCTDL_DFRMTR_FRAME_MEM_ALIGN : 0); + flags |= (d_params->fsyncs ? OCSD_DFRMTR_HAS_FSYNCS : 0); + flags |= (d_params->hsyncs ? OCSD_DFRMTR_HAS_HSYNCS : 0); + flags |= (d_params->frame_aligned ? OCSD_DFRMTR_FRAME_MEM_ALIGN : 0); /* Create decode tree for the data source */ - decoder->dcd_tree = rctdl_create_dcd_tree(format,flags); + decoder->dcd_tree = ocsd_create_dcd_tree(format,flags); if (decoder->dcd_tree == 0) { goto err_free_decoder; @@ -470,7 +472,7 @@ struct cs_etm_decoder *cs_etm_decoder__new(uint32_t num_cpu, struct cs_etm_decod return decoder; err_free_decoder_tree: - rctdl_destroy_dcd_tree(decoder->dcd_tree); + ocsd_destroy_dcd_tree(decoder->dcd_tree); err_free_decoder: free(decoder); return NULL; @@ -481,7 +483,7 @@ void cs_etm_decoder__free(struct cs_etm_decoder *decoder) { if (decoder == NULL) return; - rctdl_destroy_dcd_tree(decoder->dcd_tree); + ocsd_destroy_dcd_tree(decoder->dcd_tree); decoder->dcd_tree = NULL; free(decoder); From 0951a97ed934668b87371c2d3d0c3d21659da4df Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Tue, 3 May 2016 13:45:28 -0600 Subject: [PATCH 19/23] perf tools: fixing Makefile problems This patch is fixing the ifeq condition to get the debug or release version of the openCSD libraries. It also fix a naming typo when release libraries are southg. Signed-off-by: Mathieu Poirier --- tools/perf/config/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 033d94114bab..405c1c1e2975 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -439,11 +439,11 @@ ifdef CSTRACE_PATH else CSTRACE_LNX = linux endif - ifdef DEBUG + ifeq (${DEBUG}, 1) LIBCSTRACE = -lcstraced_c_api -lcstraced CSTRACE_LIB_PATH = $(CSTRACE_PATH)/lib/$(CSTRACE_LNX)/dbg else - LIBCSTRACE = -lcstrace_c_api -lcstrace + LIBCSTRACE = -lcstraced_c_api -lcstraced CSTRACE_LIB_PATH = $(CSTRACE_PATH)/lib/$(CSTRACE_LNX)/rel endif $(call detected,CSTRACE) From c14318faac87ba59c2ec8279e6328c774af3b723 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Mon, 16 May 2016 16:55:55 -0600 Subject: [PATCH 20/23] cs-etm: avoid casting variable Because of two's complement reprensentation, casting an int to and unsigned value doesn't simply get rid of the negative sign. As such a value of -1 becomes 0xFFFFFFFF, which is clearly not the desired effect. This patch deals with cases when @cpu has the value of -1. In those cases queue '0' is initially selected. Signed-off-by: Mathieu Poirier --- tools/perf/util/cs-etm.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 0e9a0d07f03a..1c7208840823 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -938,10 +938,12 @@ static struct cs_etm_queue *cs_etm__cpu_to_etmq(struct cs_etm_auxtrace *etm, { unsigned q,j; - if (/*(cpu < 0) ||*/ (0 == etm->queues.nr_queues)) + if (etm->queues.nr_queues == 0) return NULL; - if ((unsigned) cpu >= etm->queues.nr_queues) + if (cpu < 0) + q = 0; + else if ((unsigned) cpu >= etm->queues.nr_queues) q = etm->queues.nr_queues - 1; else q = cpu; From 7b4fe2ff5a850c70174381adbcf4fd32e41a969c Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Wed, 18 May 2016 12:58:26 -0600 Subject: [PATCH 21/23] cs-etm: account for each trace buffer in the queue Function cs_etm__get_trace() picks up a single buffer from the current queue. As such when multiple buffers are present in the queue several iteration of the fetch-decode block need to be run in order to process all the trace data. Signed-off-by: Mathieu Poirier --- tools/perf/util/cs-etm.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 1c7208840823..a28170eac18a 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -326,7 +326,6 @@ static int cs_etm__get_trace(struct cs_etm_buffer *buff, struct cs_etm_queue *et buff->len = sizeof(cstrace); buff->buf = cstrace; */ - etmq->stop = true; buff->ref_timestamp = aux_buffer->reference; @@ -339,10 +338,7 @@ static int cs_etm__get_trace(struct cs_etm_buffer *buff, struct cs_etm_queue *et if (etmq->step_through_buffers) etmq->stop = true; - if (buff->len == 0) - return cs_etm__get_trace(buff,etmq); - - return 0; + return buff->len; } static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, @@ -819,12 +815,16 @@ static int cs_etm__sample(struct cs_etm_queue *etmq) static int cs_etm__run_decoder(struct cs_etm_queue *etmq, u64 *timestamp) { - struct cs_etm_buffer buffer = {.buf = 0,}; - size_t buffer_used = 0; + struct cs_etm_buffer buffer; + size_t buffer_used; int err = 0; + /* Go through each buffer in the queue and decode them one by one */ +more: + buffer_used = 0; + memset(&buffer, 0, sizeof(buffer)); err = cs_etm__get_trace(&buffer,etmq); - if (err) + if (err <= 0) return err; do { @@ -842,6 +842,7 @@ static int cs_etm__run_decoder(struct cs_etm_queue *etmq, u64 *timestamp) cs_etm__sample(etmq); } } while (!etmq->eot && (buffer.len > buffer_used)); +goto more; (void) timestamp; From fb91616a5e3c41d308cea8efc110d38de9055d9c Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Fri, 20 May 2016 11:35:25 -0600 Subject: [PATCH 22/23] cs-etm: removing unecessary structure field Function cs_etm__sample() is called only from cs_etm__run_decoder() where cs_etm_queue::have_sample is set to 'true'. As such checking the value of the variable again in cs_etm__sample() is not needed. Since the variable isn't used anywhere else, also removing it from the structure definition. Signed-off-by: Mathieu Poirier --- tools/perf/util/cs-etm.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index a28170eac18a..a772a6f9a407 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -83,7 +83,6 @@ struct cs_etm_queue { u64 time; u64 timestamp; bool stop; - bool have_sample; struct cs_etm_decoder *decoder; u64 offset; bool eot; @@ -797,11 +796,6 @@ static int cs_etm__sample(struct cs_etm_queue *etmq) //struct cs_etm_auxtrace *etm = etmq->etm; int err; - if (!etmq->have_sample) - return 0; - - etmq->have_sample = false; - err = cs_etm_decoder__get_packet(etmq->decoder,&packet); // if there is no sample, it returns err = -1, no real error @@ -837,10 +831,8 @@ static int cs_etm__run_decoder(struct cs_etm_queue *etmq, u64 *timestamp) err = etmq->state->err; etmq->offset += processed; buffer_used += processed; - if (!err) { - etmq->have_sample = true; + if (!err) cs_etm__sample(etmq); - } } while (!etmq->eot && (buffer.len > buffer_used)); goto more; From 09e1b6ffeb1b6706c441f9369be027e7ad5d56c1 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Thu, 26 May 2016 10:31:47 -0600 Subject: [PATCH 23/23] cs-etm: associating output packet with CPU they executed on This patch adds the required mechanic to quickly lookup the CPU number associated with a traceID. That way the CPU that executed the code conveyed by a decoded packet can be identified, without having to do unecessary translations. Using this new functionality the "cs-trace-disasm.py" script is enhanced to output the file and CPU number the code has been executed on: FILE: /lib/aarch64-linux-gnu/ld-2.21.so CPU: 3 7fab57fd80: 910003e0 mov x0, sp 7fab57fd84: 94000d53 bl 7fab5832d0 FILE: /lib/aarch64-linux-gnu/ld-2.21.so CPU: 3 7fab5832d0: d11203ff sub sp, sp, #0x480 FILE: /lib/aarch64-linux-gnu/ld-2.21.so CPU: 3 7fab5832d4: a9ba7bfd stp x29, x30, [sp,#-96]! 7fab5832d8: 910003fd mov x29, sp 7fab5832dc: a90363f7 stp x23, x24, [sp,#48] 7fab5832e0: 9101e3b7 add x23, x29, #0x78 7fab5832e4: a90573fb stp x27, x28, [sp,#80] 7fab5832e8: a90153f3 stp x19, x20, [sp,#16] 7fab5832ec: aa0003fb mov x27, x0 7fab5832f0: 910a82e1 add x1, x23, #0x2a0 7fab5832f4: a9025bf5 stp x21, x22, [sp,#32] 7fab5832f8: a9046bf9 stp x25, x26, [sp,#64] 7fab5832fc: 910102e0 add x0, x23, #0x40 7fab583300: f800841f str xzr, [x0],#8 7fab583304: eb01001f cmp x0, x1 7fab583308: 54ffffc1 b.ne 7fab583300 Signed-off-by: Mathieu Poirier --- tools/perf/scripts/python/cs-trace-disasm.py | 2 + .../perf/util/cs-etm-decoder/cs-etm-decoder.c | 15 +++- .../perf/util/cs-etm-decoder/cs-etm-decoder.h | 1 + tools/perf/util/cs-etm.c | 70 ++++++++++++++++++- tools/perf/util/cs-etm.h | 7 ++ 5 files changed, 92 insertions(+), 3 deletions(-) diff --git a/tools/perf/scripts/python/cs-trace-disasm.py b/tools/perf/scripts/python/cs-trace-disasm.py index 175ae4bfa732..429d0d2d7a23 100644 --- a/tools/perf/scripts/python/cs-trace-disasm.py +++ b/tools/perf/scripts/python/cs-trace-disasm.py @@ -82,6 +82,7 @@ def process_event(t): if (len(disasm_cache) > cache_size): disasm_cache.clear(); + cpu = format(sample['cpu'], "d"); addr_range = format(sample['ip'],"x") + ":" + format(sample['addr'],"x"); try: @@ -103,6 +104,7 @@ def process_event(t): disasm_output = check_output(disasm).split('\n') disasm_cache[addr_range] = disasm_output; + print "FILE: %s\tCPU: %s" % (dso, cpu); for line in disasm_output: m = disasm_re.search(line) if (m != None) : diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 2cc7ccd3c22a..c6f23d63590d 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -16,10 +16,13 @@ * with this program. If not, see . */ +#include #include +#include "../cs-etm.h" #include "cs-etm-decoder.h" #include "../util.h" +#include "../util/intlist.h" #include "c_api/opencsd_c_api.h" #include "ocsd_if_types.h" @@ -102,10 +105,12 @@ int cs_etm_decoder__flush(struct cs_etm_decoder *decoder) static int cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder, const ocsd_generic_trace_elem *elem, + const uint8_t trace_chan_id, enum cs_etm_sample_type sample_type) { int err = 0; uint32_t et = 0; + struct int_node *inode = NULL; if (decoder == NULL) return -1; @@ -116,12 +121,18 @@ static int cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder, if (err) return err; et = decoder->end_tail; + /* Search the RB tree for the cpu associated with this traceID */ + inode = intlist__find(traceid_list, trace_chan_id); + if (!inode) + return PTR_ERR(inode); decoder->packet_buffer[et].sample_type = sample_type; decoder->packet_buffer[et].start_addr = elem->st_addr; decoder->packet_buffer[et].end_addr = elem->en_addr; decoder->packet_buffer[et].exc = false; decoder->packet_buffer[et].exc_ret = false; + decoder->packet_buffer[et].cpu = *((int*)inode->priv); + et = (et + 1) & (MAX_BUFFER - 1); decoder->end_tail = et; @@ -177,7 +188,8 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( //decoder->discontinuity = true; //break; case OCSD_GEN_TRC_ELEM_INSTR_RANGE: - cs_etm_decoder__buffer_packet(decoder,elem, CS_ETM_RANGE); + cs_etm_decoder__buffer_packet(decoder,elem, + trace_chan_id, CS_ETM_RANGE); resp = OCSD_RESP_WAIT; break; case OCSD_GEN_TRC_ELEM_EXCEPTION: @@ -409,6 +421,7 @@ static void cs_etm_decoder__clear_buffer(struct cs_etm_decoder *decoder) decoder->packet_buffer[i].end_addr = 0xdeadbeefdeadbeefUL; decoder->packet_buffer[i].exc = false; decoder->packet_buffer[i].exc_ret = false; + decoder->packet_buffer[i].cpu = INT_MIN; } } diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h index 2d1013bcdbab..38c5ae84ed07 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h @@ -51,6 +51,7 @@ struct cs_etm_packet { uint64_t end_addr; bool exc; bool exc_ret; + int cpu; }; diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index a772a6f9a407..ca93257a6cb7 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -15,6 +15,7 @@ * this program. If not, see . */ +#include #include #include #include @@ -29,6 +30,7 @@ #include "evlist.h" #include "machine.h" #include "util.h" +#include "util/intlist.h" #include "color.h" #include "cs-etm.h" #include "cs-etm-decoder/cs-etm-decoder.h" @@ -234,12 +236,20 @@ static void cs_etm__free(struct perf_session *session) { size_t i; + struct int_node *inode, *tmp; struct cs_etm_auxtrace *aux = container_of(session->auxtrace, struct cs_etm_auxtrace, auxtrace); auxtrace_heap__free(&aux->heap); cs_etm__free_events(session); session->auxtrace = NULL; + + /* First remove all traceID/CPU# nodes from the RB tree */ + intlist__for_each_safe(inode, tmp, traceid_list) + intlist__remove(traceid_list, inode); + /* Then the RB tree itself */ + intlist__delete(traceid_list); + //thread__delete(aux->unknown_thread); for (i = 0; i < aux->num_cpu; ++i) { zfree(&aux->metadata[i]); @@ -613,7 +623,7 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, sample.id = etmq->etm->instructions_id; sample.stream_id = etmq->etm->instructions_id; sample.period = (end_addr - start_addr) >> 2; - sample.cpu = etmq->cpu; + sample.cpu = packet->cpu; sample.flags = 0; // etmq->flags; sample.insn_len = 1; // etmq->insn_len; @@ -1326,12 +1336,19 @@ int cs_etm__process_auxtrace_info(union perf_event *event, size_t priv_size = 0; size_t num_cpu; struct cs_etm_auxtrace *etm = 0; - int err = 0; + int err = 0, idx = -1; u64 *ptr; u64 *hdr = NULL; u64 **metadata = NULL; size_t i,j,k; unsigned pmu_type; + struct int_node *inode; + + /* + * sizeof(auxtrace_info_event::type) + + * sizeof(auxtrace_info_event::reserved) == 8 + */ + info_header_size = 8; if (total_size < (event_header_size + info_header_size)) return -EINVAL; @@ -1355,7 +1372,20 @@ int cs_etm__process_auxtrace_info(union perf_event *event, return -EINVAL; } + /* + * Create an RB tree for traceID-CPU# tuple. Since the conversion has + * to be made for each packet that gets decoded optimizing access in + * anything other than a sequential array is worth doing. + */ + traceid_list = intlist__new(NULL); + if (!traceid_list) + return -ENOMEM; + metadata = zalloc(sizeof(u64 *) * num_cpu); + if (!metadata) { + err = -ENOMEM; + goto err_free_traceid_list; + } if (metadata == NULL) { return -EINVAL; @@ -1369,6 +1399,9 @@ int cs_etm__process_auxtrace_info(union perf_event *event, for (k = 0; k < CS_ETM_PRIV_MAX; k++) { metadata[j][k] = ptr[i+k]; } + + /* The traceID is our handle */ + idx = metadata[j][CS_ETM_ETMIDR]; i += CS_ETM_PRIV_MAX; } else if (ptr[i] == __perf_cs_etmv4_magic) { metadata[j] = zalloc(sizeof(u64)*CS_ETMV4_PRIV_MAX); @@ -1377,8 +1410,33 @@ int cs_etm__process_auxtrace_info(union perf_event *event, for (k = 0; k < CS_ETMV4_PRIV_MAX; k++) { metadata[j][k] = ptr[i+k]; } + + /* The traceID is our handle */ + idx = metadata[j][CS_ETMV4_TRCTRACEIDR]; i += CS_ETMV4_PRIV_MAX; } + + /* Get an RB node for this CPU */ + inode = intlist__findnew(traceid_list, idx); + + /* Something went wrong, no need to continue */ + if (!inode) { + err = PTR_ERR(inode); + goto err_free_metadata; + } + + /* + * The node for that CPU should not have been taken already. + * Backout if that's the case. + */ + if (inode->priv) { + err = -EINVAL; + goto err_free_metadata; + } + + /* All good, associate the traceID with the CPU# */ + inode->priv = &metadata[j][CS_ETM_CPU]; + } if (i*8 != priv_size) @@ -1463,5 +1521,13 @@ int cs_etm__process_auxtrace_info(union perf_event *event, session->auxtrace = NULL; err_free: free(etm); +err_free_metadata: + /* No need to check @metadata[j], free(NULL) is supported */ + for (j = 0; j < num_cpu; ++j) + free(metadata[j]); + free(metadata); +err_free_traceid_list: + intlist__delete(traceid_list); + return err; } diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index 7b683a024690..ec6ff78f1905 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h @@ -18,6 +18,10 @@ #ifndef INCLUDE__UTIL_PERF_CS_ETM_H__ #define INCLUDE__UTIL_PERF_CS_ETM_H__ +#include "util/event.h" +#include "util/intlist.h" +#include "util/session.h" + /* Versionning header in case things need tro change in the future. That way * decoding of old snapshot is still possible. */ @@ -61,6 +65,9 @@ enum { CS_ETMV4_PRIV_MAX, }; +/* RB tree for quick conversion between traceID and CPUs */ +struct intlist *traceid_list; + #define KiB(x) ((x) * 1024) #define MiB(x) ((x) * 1024 * 1024)